From: Matt McCutchen <matt@mattmccutchen.net>
Date: Sat, 2 Aug 2008 20:11:15 +0000 (-0400)
Subject: Make the instance generator select proposal topics from a Zipf distribution, and
X-Git-Url: https://mattmccutchen.net/match/match.git/commitdiff_plain/8723ed6adebc35a25ca240acdb587444c85fb44f

Make the instance generator select proposal topics from a Zipf distribution, and
make the Zipf distribution exponent and the number of topics configurable.
---

diff --git a/program/PMConfig.hs b/program/PMConfig.hs
index 20a380f..ecbe1cb 100644
--- a/program/PMConfig.hs
+++ b/program/PMConfig.hs
@@ -3,6 +3,12 @@ module PMConfig where
 import PMInstance
 import IMinCostFlow
 
+-- A PMConfig structure has a bunch of configuration values used by
+-- many functions in PMInstanceGenerator, ProposalMatcher, and Evaluation, which
+-- take a PMConfig as their first argument.  Module PMDefaults defines a sane
+-- default configuration `pmDefaults'; it can be used as-is or individual fields
+-- can be overridden, e.g., pmDefaults{loadTolerance = 2} .  See module
+-- PMDefaults for documentation of each field.
 data PMConfig = PMConfig {
 	minCostFlow :: MinCostFlowImpl,
 	reviewsEachProposal :: Int,
@@ -17,5 +23,7 @@ data PMConfig = PMConfig {
 	marginalVeryBoringCost :: Wt -> Wt,
 	assignmentCost :: Wt -> Wt,
 	knowledgeableBonus :: Wt,
-	expertBonus :: Wt
+	expertBonus :: Wt,
+	numTopics :: Int,
+	topicZipfExponent :: Wt
 }
diff --git a/program/PMDefaults.hs b/program/PMDefaults.hs
index aa68aac..a68d182 100644
--- a/program/PMDefaults.hs
+++ b/program/PMDefaults.hs
@@ -5,6 +5,7 @@ import PMConfig
 import qualified NaiveMinCostFlow
 import qualified CS2MinCostFlow
 
+-- A default set of configuration values; see module PMConfig.
 pmDefaults = PMConfig {
 
 -- === Choose a min-cost flow implementation (timings on mattlaptop2) ===
@@ -58,6 +59,14 @@ assignmentCost = \pref -> (numAsWt 10 + pref) ^ 2,
 knowledgeableBonus = 1000,
 
 -- Bonus for an additional expert review.
-expertBonus = 1000
+expertBonus = 1000,
+
+-- === Parameters for the random-instance generator ===
+
+-- Number of topics.
+numTopics = 20,
+
+-- Exponent of the Zipf distribution used to choose topics for each proposal.
+topicZipfExponent = -0.5
 
 }
diff --git a/program/PMInstanceGenerator.hs b/program/PMInstanceGenerator.hs
index 97a546a..0f78b02 100644
--- a/program/PMInstanceGenerator.hs
+++ b/program/PMInstanceGenerator.hs
@@ -1,22 +1,21 @@
 module PMInstanceGenerator where
 import PMInstance
+import PMConfig
 import System.Random
 import RandomizedMonad
 import Data.Array.IArray
 import ArrayStuff
 
-numTopics = 20
-
 -- Expertise on each of the topics
 data ReviewerInfo = ReviewerInfo {
 	riTopicExpertness :: Array Int Double,
 	riConflicts       :: [Int]
 }
 
-randomReviewerInfo numProps = do
+randomReviewerInfo cfg numProps = do
 	-- "Older" reviewers are more likely to be expert on topics.
 	age <- mrandomR (0.5, 1.0)
-	expns <- indRandomArray (0, numTopics-1) $
+	expns <- indRandomArray (0, numTopics cfg - 1) $
 		withProb [(0.15 * age, return 2), (0.4 * age, return 1)] (return 0)
 	-- Samir: "Its often the case that each reviewer has a COI with say
 	-- one proposal submitted either by their University (different faculty)
@@ -35,11 +34,15 @@ data ProposalInfo = ProposalInfo {
 	piDifficulty :: Wt
 }
 
-randomProposalInfo = do
+randomTopic cfg = withWeight $
+	map (\i -> (numAsWt (i+1) ** topicZipfExponent cfg, return i))
+		[0 .. numTopics cfg - 1]
+
+randomProposalInfo cfg = do
 	topics <- do
-		t1 <- mrandomR (0, numTopics-1)
+		t1 <- randomTopic cfg
 		withProb [(0.5, return $ PTopic1 t1)] (do
-			t2 <- filterRandomized (/= t1) $ mrandomR (0, numTopics-1)
+			t2 <- filterRandomized (/= t1) $ randomTopic cfg
 			return $ PTopic2 t1 t2
 			)
 	diff <- mrandomR (3, 5)
@@ -49,11 +52,11 @@ expertnessToPref expertness = if expertness == 0 then 7
 	else if expertness == 1 then 5
 	else 3
 
-randomInstance :: Int -> Int -> Randomized PMInstance
-randomInstance numRvrs numProps = do
-	reviewerInfos <- indRandomArray (0, numRvrs-1) $ randomReviewerInfo numProps
+randomInstance :: PMConfig -> Int -> Int -> Randomized PMInstance
+randomInstance cfg numRvrs numProps = do
+	reviewerInfos <- indRandomArray (0, numRvrs-1) $ randomReviewerInfo cfg numProps
 		:: Randomized (Array Int ReviewerInfo)
-	proposalInfos <- indRandomArray (0, numProps-1) $ randomProposalInfo
+	proposalInfos <- indRandomArray (0, numProps-1) $ randomProposalInfo cfg
 		:: Randomized (Array Int ProposalInfo)
 	let loadA = constArray (0, numRvrs-1) 1
 	let prefA = funcArray ((0, 0), (numRvrs-1, numProps-1)) (\(i,j) ->
diff --git a/program/RandomizedMonad.hs b/program/RandomizedMonad.hs
index 62e8808..22b98aa 100644
--- a/program/RandomizedMonad.hs
+++ b/program/RandomizedMonad.hs
@@ -3,7 +3,7 @@ module RandomizedMonad (
 	msplit,
 	runRandom1, runRandom, runRandomStd, runRandomNewStd,
 	mrandomR, mrandom,
-	withProb,
+	withProb, withWeight,
 	filterRandomized,
 	indReplicateRandom, indRepeatRandom, indRandomArray
 ) where
@@ -70,6 +70,13 @@ withProb ifCs elseR = do
 	val <- mrandom
 	chooseCase val ifCs elseR
 
+-- Like withProb, but without an else case and with the "probabilities" scaled
+-- so that they sum to 1.
+withWeight :: [(Double, Randomized a)] -> Randomized a
+withWeight ifCs = do
+	val <- mrandomR (0, sum (map fst ifCs))
+	chooseCase val (tail ifCs) (snd (head ifCs))
+
 -- Keep trying until we get what we want.
 filterRandomized :: (a -> Bool) -> Randomized a -> Randomized a
 filterRandomized f ra = do
diff --git a/program/TestUtils.hs b/program/TestUtils.hs
index acee1c7..96a170c 100644
--- a/program/TestUtils.hs
+++ b/program/TestUtils.hs
@@ -88,17 +88,17 @@ goGraph theGraph =
 l1 /\ l2 = (l1 \\ l2, l2 \\ l1)
 
 -- Evaluation!
-runEvaluation nr np = do
-	let inst = runRandom myGen $ randomInstance nr np
+runEvaluation cfg nr np = do
+	let inst = runRandom myGen $ randomInstance cfg nr np
 	putStr (show inst ++ "\n")
-	let m0 = doMatching pmDefaults{loadTolerance = 0} inst
+	let m0 = doMatching cfg{loadTolerance = 0} inst
 	putStr ("Matching with load tolerance 0:\n" ++ show m0 ++ "\n")
-	let m1 = doMatching pmDefaults{loadTolerance = 1} inst
+	let m1 = doMatching cfg{loadTolerance = 1} inst
 	putStr ("Matching with load tolerance 1:\n" ++ show m1 ++ "\n")
 	putStr ("Differences:\n" ++ show (m0 /\ m1) ++ "\n")
-	let e0 = evaluateMatching pmDefaults{loadTolerance = 0} inst m0
+	let e0 = evaluateMatching cfg{loadTolerance = 0} inst m0
 	putStr ("Evaluation of first matching:\n" ++ show e0 ++ "\n")
-	let e1 = evaluateMatching pmDefaults{loadTolerance = 1} inst m1
+	let e1 = evaluateMatching cfg{loadTolerance = 1} inst m1
 	putStr ("Evaluation of second matching:\n" ++ show e1 ++ "\n")
 	putStr ("Evaluation differences:\n" ++
 		show (sortedDiffEvaluations e0 e1) ++ "\n")