- Finish revising the paper.

author Matt McCutchen <matt@mattmccutchen.net>

Sun, 12 Feb 2012 06:39:59 +0000 (22:39 -0800)

committer Matt McCutchen <matt@mattmccutchen.net>

Sun, 12 Feb 2012 06:39:59 +0000 (22:39 -0800)
author Matt McCutchen <matt@mattmccutchen.net>
Sun, 12 Feb 2012 06:39:59 +0000 (22:39 -0800)
committer Matt McCutchen <matt@mattmccutchen.net>
Sun, 12 Feb 2012 06:39:59 +0000 (22:39 -0800)
diff --git a/paper/flow.fig b/paper/flow.fig

index 3b3696e..ef3359a 100644 (file)
--- a/paper/flow.fig
+++ b/paper/flow.fig
@@ -1,4 +1,4 @@
-#FIG 3.2  Produced by xfig version 3.2.5
+#FIG 3.2  Produced by xfig version 3.2.5b
  Landscape
  Center
  Inches
  Landscape
  Center
  Inches
@@ -68,12 +68,15 @@ Single
         1 1 3.00 45.00 90.00
          2325 5925 2325 6375
  -6
         1 1 3.00 45.00 90.00
          2325 5925 2325 6375
  -6
-6 5925 300 8925 1425
-4 0 0 50 -1 0 12 0.0000 4 180 2625 5925 450 Reviewer 1 is an expert reviewer\001
-4 0 0 50 -1 0 12 0.0000 4 180 2715 5925 675 for paper 1, but reviewer 2 is only\001
-4 0 0 50 -1 0 12 0.0000 4 180 2985 5925 900 a knowledgable reviewer for paper 1.\001
-4 0 0 50 -1 0 12 0.0000 4 135 2880 5925 1125 Reviewer 3 does not want to review\001
-4 0 0 50 -1 0 12 0.0000 4 180 645 5925 1350 paper 1.\001
+6 5625 6000 8325 6750
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
+       0 0 1.00 60.00 120.00
+        6150 6450 7425 6450
+2 1 0 3 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
+       1 1 3.00 45.00 90.00
+        6150 6075 7350 6075
+4 0 0 50 -1 0 12 0.0000 6 180 2685 5625 6300 Parallel edges with total capacity\001
+4 0 0 50 -1 0 12 0.0000 6 180 1230 6150 6675 (capacity, cost)\001
  -6
  1 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 525 3975 75 75 525 3975 525 4050
  1 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 8422 3922 75 75 8422 3922 8422 3997
  -6
  1 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 525 3975 75 75 525 3975 525 4050
  1 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 8422 3922 75 75 8422 3922 8422 3997
@@ -89,12 +92,6 @@ Single
  2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
         0 0 1.00 60.00 120.00
          525 4050 2250 5175
  2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
         0 0 1.00 60.00 120.00
          525 4050 2250 5175
-2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
-       0 0 1.00 60.00 120.00
-        5850 3000 8325 3900
-2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
-       0 0 1.00 60.00 120.00
-        5850 5250 8325 3975
  2 2 2 1 0 7 50 -1 -1 3.000 0 0 -1 0 0 5
          5400 1425 6150 1425 6150 3150 5400 3150 5400 1425
  2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
  2 2 2 1 0 7 50 -1 -1 3.000 0 0 -1 0 0 5
          5400 1425 6150 1425 6150 3150 5400 3150 5400 1425
  2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
@@ -107,59 +104,71 @@ Single
          5775 4050 5775 4575
  2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
         0 0 1.00 60.00 120.00
          5775 4050 5775 4575
  2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
         0 0 1.00 60.00 120.00
-        2400 600 5700 1650
+        525 3900 2250 675
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
+        2775 1350 2400 1650
  2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
         0 0 1.00 60.00 120.00
  2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
         0 0 1.00 60.00 120.00
-        2400 2925 5700 2325
+        2396 3582 5700 1650
  2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
         0 0 1.00 60.00 120.00
  2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
         0 0 1.00 60.00 120.00
-        5025 6525 6300 6525
+        2365 626 5700 2325
  2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
         0 0 1.00 60.00 120.00
  2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
         0 0 1.00 60.00 120.00
-        525 3900 2250 675
+        2400 5850 5658 2355
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
+        2775 1350 2400 900
  2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
         0 0 1.00 60.00 120.00
  2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
         0 0 1.00 60.00 120.00
-        2400 6450 5700 3000
-2 1 0 3 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
-       1 1 3.00 45.00 90.00
-        5025 6150 6225 6150
+        5850 5250 8325 3975
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
+       0 0 1.00 60.00 120.00
+        5787 2966 8333 3878
  2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
  2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
-        2775 1575 2400 900
+        1275 1350 1500 1500
  2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
  2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
-        2775 1575 2400 1650
-4 0 0 50 -1 0 12 0.0000 4 135 1740 1950 2400 Nodes for Reviewer 1\001
-4 0 0 50 -1 0 12 0.0000 4 135 1740 1800 4725 Nodes for Reviewer 2\001
-4 0 0 50 -1 0 12 0.0000 4 135 1740 1725 6900 Nodes for Reviewer 3\001
-4 0 0 50 -1 0 12 0.0000 4 180 1425 4950 5775 Nodes for paper 2\001
-4 0 0 50 -1 0 12 0.0000 4 180 1185 5175 6825 (capacity,cost)\001
-4 0 0 50 -1 0 12 0.0000 4 180 1425 4950 3450 Nodes for paper 1\001
-4 0 0 50 -1 0 12 0.0000 6 195 720 6300 4050 $p^1_2$\001
-4 0 0 50 -1 0 12 0.0000 6 195 720 6375 5400 $p^3_2$\001
-4 0 0 50 -1 0 12 0.0000 6 195 720 6300 4725 $p^2_2$\001
-4 0 0 50 -1 0 12 0.0000 4 165 330 7200 3375 (r,0)\001
-4 0 0 50 -1 0 12 0.0000 4 165 330 7200 4350 (r,0)\001
-4 0 0 50 -1 0 12 0.0000 6 180 675 2850 600 $r^1_1$\001
-4 0 0 50 -1 0 12 0.0000 6 180 675 2850 1950 $r^3_1$\001
-4 0 0 50 -1 0 12 0.0000 6 180 675 2850 1275 $r^2_1$\001
-4 0 0 50 -1 0 12 0.0000 6 180 675 2850 3000 $r^1_2$\001
-4 0 0 50 -1 0 12 0.0000 6 180 675 2850 5250 $r^1_3$\001
-4 0 0 50 -1 0 12 0.0000 6 180 675 2850 3675 $r^2_2$\001
-4 0 0 50 -1 0 12 0.0000 6 180 675 2850 5925 $r^2_3$\001
-4 0 0 50 -1 0 12 0.0000 6 180 675 2850 4275 $r^3_2$\001
-4 0 0 50 -1 0 12 0.0000 6 180 675 2850 6525 $r^3_3$\001
-4 0 0 50 -1 0 12 0.0000 4 165 390 975 3450 (L,0)\001
-4 0 0 50 -1 0 12 0.0000 4 165 390 975 4275 (L,0)\001
-4 0 0 50 -1 0 12 0.0000 6 195 1710 3825 900 $(1,(10+p_{11})^2)$\001
-4 0 0 50 -1 0 12 0.0000 6 195 1710 3825 2850 $(1,(10+p_{21})^2)$\001
-4 0 0 50 -1 0 12 0.0000 4 180 1125 5100 6375 Parallel edges\001
-4 0 0 50 -1 0 12 0.0000 6 180 840 4800 2025 $(1,-c_1)$\001
-4 0 0 50 -1 0 12 0.0000 6 195 930 5850 2025 $(\\infty,0)$\001
-4 0 0 50 -1 0 12 0.0000 6 195 2205 5925 2700 $(1,-c_2)$ and $(\\infty, 0)$\001
-4 0 0 50 -1 0 12 0.0000 6 195 720 6225 3000 $p^3_1$\001
-4 0 0 50 -1 0 12 0.0000 6 195 720 6225 2400 $p^2_1$\001
-4 0 0 50 -1 0 12 0.0000 6 195 720 6225 1725 $p^1_1$\001
-4 0 0 50 -1 0 12 0.0000 4 135 135 1050 4950 C\001
-4 0 0 50 -1 0 12 0.0000 4 135 135 900 2175 C\001
-4 0 0 50 -1 0 12 0.0000 4 135 135 1425 3825 C\001
-4 0 0 50 -1 0 12 0.0000 4 165 390 1350 2550 (L,0)\001
-4 0 0 50 -1 0 12 0.0000 4 135 360 2850 1650 L+C\001
+        1275 1350 1650 1725
+4 0 0 50 -1 0 12 0.0000 6 180 675 2475 1950 $r^3_1$\001
+4 0 0 50 -1 0 12 0.0000 6 180 675 2475 1350 $r^2_1$\001
+4 0 0 50 -1 0 12 0.0000 6 180 675 2475 3000 $r^1_2$\001
+4 0 0 50 -1 0 12 0.0000 6 180 675 2475 4275 $r^3_2$\001
+4 0 0 50 -1 0 12 0.0000 6 180 675 2475 600 $r^1_1$\001
+4 0 0 50 -1 0 12 0.0000 6 180 675 2475 5250 $r^1_3$\001
+4 0 0 50 -1 0 12 0.0000 6 180 675 2475 5925 $r^2_3$\001
+4 0 0 50 -1 0 12 0.0000 6 180 675 2475 6525 $r^3_3$\001
+4 0 0 50 -1 0 12 0.0000 6 195 720 5925 4050 $p^1_2$\001
+4 0 0 50 -1 0 12 0.0000 6 195 720 5925 4725 $p^2_2$\001
+4 0 0 50 -1 0 12 0.0000 6 195 720 5925 5400 $p^3_2$\001
+4 0 0 50 -1 0 12 0.0000 6 165 570 2850 1425 $L+C$\001
+4 0 0 50 -1 0 12 0.0000 6 135 1665 1650 6900 Nodes for reviewer 3\001
+4 0 0 50 -1 0 12 0.0000 6 135 1665 1650 4650 Nodes for reviewer 2\001
+4 0 0 50 -1 0 12 0.0000 6 180 1425 5100 5700 Nodes for paper 2\001
+4 0 0 50 -1 0 12 0.0000 6 180 600 975 4275 $(L,0)$\001
+4 0 0 50 -1 0 12 0.0000 6 165 345 1050 4950 $C$\001
+4 0 0 50 -1 0 12 0.0000 6 165 345 1425 3825 $C$\001
+4 0 0 50 -1 0 12 0.0000 2 165 285 300 4050 $s$\001
+4 0 0 50 -1 0 12 0.0000 6 135 1665 1650 2325 Nodes for reviewer 1\001
+4 0 0 50 -1 0 12 0.0000 6 180 1425 5100 3450 Nodes for paper 1\001
+4 0 0 50 -1 0 12 0.0000 6 180 600 975 3375 $(L,0)$\001
+4 0 0 50 -1 0 12 0.0000 6 180 600 1275 2625 $(L,0)$\001
+4 0 0 50 -1 0 12 0.0000 2 165 270 8550 3975 $t$\001
+4 0 0 50 -1 0 12 0.0000 6 195 720 5925 1725 $p^1_1$\001
+4 0 0 50 -1 0 12 0.0000 6 195 720 5925 2400 $p^2_1$\001
+4 0 0 50 -1 0 12 0.0000 6 195 720 5925 3000 $p^3_1$\001
+4 0 0 50 -1 0 12 0.0000 6 180 540 7200 4275 $(r,0)$\001
+4 0 0 50 -1 0 12 0.5236 6 180 1710 3375 3225 $(1,(10+d_{21})^2)$\001
+4 0 0 50 -1 0 12 0.7854 6 180 1710 3525 4950 $(1,(10+d_{31})^2)$\001
+4 0 0 50 -1 0 12 0.0000 2 180 570 2400 3975 \\eg{C}\001
+4 0 0 50 -1 0 12 0.0000 6 165 345 825 2175 $C$\001
+4 0 0 50 -1 0 12 0.0000 6 180 675 2475 3720 $r^2_2$\001
+4 0 0 50 -1 0 12 0.0000 2 180 570 2400 3255 \\eg{B}\001
+4 0 0 50 -1 0 12 0.0000 2 180 570 975 1350 \\eg{A}\001
+4 0 0 50 -1 0 12 0.0000 6 195 1515 5850 2025 \\eg{F} $(\\infty,0)$\001
+4 0 0 50 -1 0 12 0.0000 6 195 2820 5925 2700 \\eg{G} $(1,-c_2)$ and $(\\infty, 0)$\001
+4 0 0 50 -1 0 12 0.0000 6 195 1155 7200 3375 \\eg{H} $(r,0)$\001
+4 0 0 50 -1 0 12 0.0000 6 180 840 4725 4875 $(1,-c_1)$\001
+4 0 0 50 -1 0 12 0.0000 2 180 555 4950 4650 \\eg{E}\001
+4 0 0 50 -1 0 12 5.8469 6 195 2325 3225 975 \\eg{D} $(1,(10+d_{11})^2)$\001
+4 0 0 50 -1 0 12 0.0000 2 180 1995 5925 7275 with the implementation\001
+4 0 0 50 -1 0 12 0.0000 2 180 2370 5925 7050 Edge groups cross-referenced\001
+4 0 0 50 -1 0 12 0.0000 2 180 570 5550 7050 \\eg{A}\001
diff --git a/paper/paper.tex b/paper/paper.tex

index 2882ac9..7ce6770 100644 (file)
--- a/paper/paper.tex
+++ b/paper/paper.tex
@@ -25,7 +25,9 @@
  \usepackage{pstricks}
  \usepackage{delarray}
  
  \usepackage{pstricks}
  \usepackage{delarray}
  
-\title{Assigning Reviewers to Proposals}
+\def\code#1{\texttt{#1}}
+
+\title{Assigning Papers to Reviewers}
  
  \author{
  Samir Khuller\thanks{Research currently supported by CCF-0728839.
  
  \author{
  Samir Khuller\thanks{Research currently supported by CCF-0728839.
@@ -74,7 +76,7 @@ score indicating his/her level of interest in reviewing the paper and an
  (Some organizations may choose to use a single set of scores for both
  desirability and expertise. We believe that making this distinction may better
  model the real-world objective.)
  (Some organizations may choose to use a single set of scores for both
  desirability and expertise. We believe that making this distinction may better
  model the real-world objective.)
-A reviewer may also report a conflict of interest with a particular paper,
+A reviewer may also declare a conflict of interest with a particular paper,
  meaning that he/she is forbidden to review the paper.
  
  We do not consider stable marriage type preference lists,
  meaning that he/she is forbidden to review the paper.
  
  We do not consider stable marriage type preference lists,
@@ -83,16 +85,16 @@ to produce. In this scheme, the papers are essentially grouped
  into a few categories.
  
  Let $N$ be the number of papers and $P$ be the number of reviewers.
  into a few categories.
  
  Let $N$ be the number of papers and $P$ be the number of reviewers.
-Suppose that each paper needs $r$ reviews, so a total of $rN$
+Suppose that each paper needs $q$ reviews, so a total of $qN$
  reviews need to be generated.
  Ideally, from the perspective of the papers, we would like to
  reviews need to be generated.
  Ideally, from the perspective of the papers, we would like to
-assign each paper the $r$ most qualified reviewers for the paper.
+assign each paper the $q$ most qualified reviewers for the paper.
  Of course, this could lead to a load imbalanced solution where
  the load on some program committee members is very high, and the
  load on others is low. On the other hand, we could insist 
  on a perfectly load balanced solution in which the number
  of papers assigned to each program committee member does not
  Of course, this could lead to a load imbalanced solution where
  the load on some program committee members is very high, and the
  load on others is low. On the other hand, we could insist 
  on a perfectly load balanced solution in which the number
  of papers assigned to each program committee member does not
-exceed $L= \lceil rN/P  \rceil$. However, this may lead to a solution
+exceed $L= \lceil qN/P  \rceil$. However, this may lead to a solution
  which is not optimal from the perspective of the papers.
  
  One of our goals is to study precisely this tradeoff, and allow each
  which is not optimal from the perspective of the papers.
  
  One of our goals is to study precisely this tradeoff, and allow each
@@ -107,11 +109,21 @@ have most liked to review.
  We would like to spread the load of the stinkers as evenly as possible.
   
  
  We would like to spread the load of the stinkers as evenly as possible.
   
  
-\section{Formulation as a Min-Cost Flow Problem}
+\section{Formulation as a Min-Cost Max-Flow Problem}
+
+\begin{figure*}
+\begin{center}
+\def\eg#1{\fbox{#1}}
+\centerline{\includexfig{flow}}
+\caption{Flow Construction.}
+\label{flow-fig}
+\end{center}
+\end{figure*}
  
  
-Our main approach is to formulate this as a min-cost flow problem.
+We formulate the assignment problem as a min-cost max-flow problem,
+where each unit of flow represents one review.
  The construction is somewhat involved in order to incorporate all
  The construction is somewhat involved in order to incorporate all
-the appropriate incentives.  It makes
+the desired incentives.  In several places, it makes
  use of sets of ``parallel edges'' of different costs connecting a
  single pair of nodes $(x, y)$ to allow flow to be sent from $x$ to $y$
  at a cost that grows faster than linear in the amount of the flow.  For
  use of sets of ``parallel edges'' of different costs connecting a
  single pair of nodes $(x, y)$ to allow flow to be sent from $x$ to $y$
  at a cost that grows faster than linear in the amount of the flow.  For
@@ -119,11 +131,12 @@ example, if there are five unit-capacity edges from $x$ to $y$ of costs
  1, 3, 5, 7, and 9, then any integer amount $f \le 5$ of flow can be
  sent from $x$ to $y$ at a cost of $f^2$.
  
  1, 3, 5, 7, and 9, then any integer amount $f \le 5$ of flow can be
  sent from $x$ to $y$ at a cost of $f^2$.
  
-The construction is done as follows: we have a source $s$ and a sink $t$.
+An example of the graph is shown in Figure~\ref{flow-fig}.
+We have a source $s$ and a sink $t$.
  For each paper $j$ we create a set of nodes $p^1_j, p^2_j,p^3_j$, and for each
  reviewer $i$ we create a set of nodes $r^1_i, r^2_i, r^3_i$.  (The rationale
  For each paper $j$ we create a set of nodes $p^1_j, p^2_j,p^3_j$, and for each
  reviewer $i$ we create a set of nodes $r^1_i, r^2_i, r^3_i$.  (The rationale
-for these sets is discussed below.)  See Figure~\ref{flow-fig} for an
-example.  Flow can pass from $s$ through one or more of the nodes $r^t_i$ and
+for these sets is discussed below.)
+Flow can pass from $s$ through one or more of the nodes $r^t_i$ and
  one or more of the nodes $p^t_j$ to the sink to represent a review
  by reviewer $i$ of paper $j$.
  
  one or more of the nodes $p^t_j$ to the sink to represent a review
  by reviewer $i$ of paper $j$.
  
@@ -148,33 +161,41 @@ unless the load imbalance in the second solution is outweighed by
  other benefits.
  
  For each reviewer $i$ and proposal $j$, there is a unit-capacity edge from $i$
  other benefits.
  
  For each reviewer $i$ and proposal $j$, there is a unit-capacity edge from $i$
-to $j$ allowing that assignment to be made, unless the reviewer declared a
-conflict of interest, in which case the edge is not present.  The edge has
-cost $(10 + p_{ij})^2$, where $p_{ij}$ is the preference value
-expressed by reviewer $i$ for proposal $j$.
-We assume $p_{ij}$ is a value between 1 and 40 (as used by NSF).
-The cost function was chosen to provide an incentive to avoid really bad
-assignments without completely masking the difference between a good assignment
-and an excellent assignment.
-
-These purely additive assignment costs make the algorithm prefer better
-assignments in general but do nothing to promote fairness among reviewers or
-among papers.  To do that, we introduce additional reviewer-side costs and
-paper-side bonuses.  With respect to a
-reviewer $i$, we classify papers as interesting (preference value 1 to 15), boring
-(16 to 25), or very boring (26 to 40).  The edge for reviewer $i$ and paper
+to $j$ allowing that pair to be assigned, unless the reviewer declared a
+conflict of interest, in which case the edge is not present.  The edge cost is
+based on the desirability value $d_{ij}$ stated by reviewer $i$ for proposal
+$j$.  For values on the NSF scale of 1 (best) to 40 (worst), we chose the cost
+function $(10 + d_{ij})^2$, in an attempt to provide an incentive to avoid
+really bad matched pairs without completely masking the difference between a
+good matched pair and an excellent one.  This choice seeks only to achieve a
+natural relationship between a linear preference scale as normally interpreted
+and the costs to be used in the optimization.  We realize that strategic
+reviewers will take the cost function into account in choosing what desirability
+values to submit, in which case its form matters little.
+
+Alongside these purely additive per-review costs,
+we want to avoid an individual reviewer
+getting too many papers he/she does not like.
+With respect to a reviewer $i$, we classify papers as ``interesting'',
+``boring'', or ``very boring'' based on their desirability values;
+the thresholds for these classes are currently
+the same for all reviewers.  The edge for reviewer $i$ and paper
  $j$ leaves from $r^1_i$ if $j$ is interesting, $r^2_i$ if $j$ is boring, or
  $r^3_i$ if $j$ is very boring.  Since all edges from the source enter $r^1_i$,
  $j$ leaves from $r^1_i$ if $j$ is interesting, $r^2_i$ if $j$ is boring, or
  $r^3_i$ if $j$ is very boring.  Since all edges from the source enter $r^1_i$,
-flow for boring and very boring assignments is forced to pass through a set of
-parallel edges from $r^1_i$ to $r^2_i$, and flow for very boring assignments
+flow for boring and very boring papers is forced to pass through a set of
+parallel edges from $r^1_i$ to $r^2_i$, and flow for very boring papers
  must pass through an additional set of parallel edges from $r^2_i$ to $r^3_i$.
  In each of these sets, we make the cost of the $l$th edge an increasing
  function of $l$ to provide an incentive to balance the load of boring
  must pass through an additional set of parallel edges from $r^2_i$ to $r^3_i$.
  In each of these sets, we make the cost of the $l$th edge an increasing
  function of $l$ to provide an incentive to balance the load of boring
-assignments in the same way as for overload.
-
-Similarly, we wish to guarantee each paper at least one or two good reviews.
-With respect to a paper $j$, we classify reviewers as expert (preference value
-1 to 10), knowledgeable (11 to 20), or general (21 to 40).  Edges representing
+papers in the same way as for overload.
+
+Finally, we would like at least one or two of each paper's reviews to be well
+qualified, if possible.  The method is the same as that for reviewer fairness.
+With respect to a paper $j$, we classify reviewers as ``expert'',
+``knowledgeable'', or ``general'' by comparing the expertise values to uniform
+thresholds.  (Since this is the only place the expertise values are used, we
+effectively assume expertise takes on only these three values.)
+Edges representing
  expert reviews enter $p^1_j$, edges for knowledgeable reviews enter $p^2_j$,
  and edges for general reviews enter $p^3_j$; the edge to the sink leaves
  $p^3_j$.  A paper's first knowledgeable (or expert) review scores a bonus $c_2$
  expert reviews enter $p^1_j$, edges for knowledgeable reviews enter $p^2_j$,
  and edges for general reviews enter $p^3_j$; the edge to the sink leaves
  $p^3_j$.  A paper's first knowledgeable (or expert) review scores a bonus $c_2$
@@ -187,33 +208,75 @@ from $p^1_j$ to $p^2_j$ and from $p^2_j$ to $p^3_j$ in order to reach the sink.
  The choice to offer bonuses for two reviews was based on the value $r = 3$;
  this would be easy to change for other values of $r$.
  
  The choice to offer bonuses for two reviews was based on the value $r = 3$;
  this would be easy to change for other values of $r$.
  
-The cost of a flow is the sum of its reviewer overload costs,
-assignment costs, and reviewer boring / very boring load costs,
-minus paper bonuses.  Any one of those components can be traded off against
-the others.  We attempted to assign reasonable weights to each component,
-but it is difficult to know without testing the algorithm on real data.
-In any event, all the parameters are easy to tune to realize the priorities
-of a particular application.
-
-\begin{figure*}
-\begin{center}
-\centerline{\includexfig{flow}}
-\caption{Flow Construction.}
-\label{flow-fig}
-\end{center}
-\end{figure*}
-
-
-\section{Experimental Results} 
-Waiting for data from NSF. 
-
-Synthetic Data.
-
-\section{Conclusions}
-
-The source code for the current version of the proposal matcher may be
-browsed or downloaded at:
-\[\hbox{\url{TODO}}\]
+In the example in Figure~\ref{flow-fig},
+paper 1 is interesting to reviewer 1 and boring to reviewers 2 and 3.
+Reviewer 2 is expert on paper 1, with reviewers 1 and 3 merely knowledgeable.
+(Reviewer edges for paper 2 are not shown.)
+This illustrates how, in principle,
+the desirability and expertise relations might differ.
+Each is taken into account at a different stage of the construction.
+
+The cost of a flow (assignment) is the sum of its reviewer overload costs,
+per-review costs, and reviewer boring / very boring load costs,
+minus paper bonuses.  Any one of these components can be traded off against
+the others.
+
+\section{Evaluation}
+
+We have implemented the matching algorithm, based on the construction above, in
+Haskell.
+The construction is intended to illustrate
+ways to model real concerns that we find reasonable a priori.
+We do not have enough experience with real-world
+instances to be confident that each part of the construction serves its intended
+purpose or that the parameter values we have chosen are suitable.
+Fortunately, the parameter values are easy to change in the source code of our
+implementation, and even substantive changes to the graph structure are not too
+hard to make.  At a minimum, we believe that min-cost max-flow provides a
+reasonable framework for global optimization of an assignment.
+
+We have worked with Michael Hicks, program chair of POPL 2012, to apply our
+method to the paper assignment problem for that conference.
+The set of POPL 2012 reviewers consisted of the
+program committee (PC) and an external review committee (ERC),
+where the ERC served two purposes:
+\begin{itemize}
+\item To provide up to one knowledgeable (or expert) review per paper if
+prior knowledge of the topic was hard to find on the PC.
+\item To provide all reviews of papers with an author on the PC (``PC papers''),
+which were considered to
+pose an implicit conflict of interest to all PC members.
+\end{itemize}
+The special policy for ERC reviews of non-PC papers was realized via a more
+complex paper-side gadget, not described here.
+Based on an evaluation tool he wrote as well as manual inspection,
+Dr.~Hicks was satisfied that the decisions made by the matching tool
+closely reflected the best he could have done manually.
+
+We are looking for additional opportunities to apply the matching tool.
+Anyone interested is invited to contact us so we can help adapt it
+to the scenario and document the experience gained.
+
+%Waiting for data from NSF. 
+
+%Synthetic Data.
+
+\section{Getting the Tool}
+
+A distribution containing the source code for the matching tool as well as this
+document may be browsed or downloaded at (NOT YET):
+\[\hbox{\url{https://mattmccutchen.net/match/}}\]
+There are currently two branches:
+\begin{itemize}
+\item \code{master} has the tool as originally designed for NSF, with no
+distinction between desirability and expertise.
+\item \code{popl2012} is the basis of the version used for POPL 2012.  The main
+differences are that it has separate desirability and expertise, support for
+``fixing'' previously chosen reviewer-paper pairs (buggy, however),
+and the special ERC gadget.
+\end{itemize}
+We regret that we do not currently have a single well-maintained version of the
+tool to recommend.
  
  \begin{thebibliography}{99}
  
  
  \begin{thebibliography}{99}
  
diff --git a/program/ProposalMatcher.hs b/program/ProposalMatcher.hs

index dcc757a..c2a955f 100644 (file)
--- a/program/ProposalMatcher.hs
+++ b/program/ProposalMatcher.hs
@@ -55,7 +55,7 @@ doReduction cfg (PMInstance numRvrs numProps rloadA prefA) =
                 totalReviews = (reviewsEachProposal cfg) * numProps
                 totalRelativeLoad = foldl (+) 0 (map (rloadA !) [0 .. numRvrs - 1])
                 targetLoad i = ceiling (widenInteger totalReviews * (rloadA ! i) / totalRelativeLoad)
                 totalReviews = (reviewsEachProposal cfg) * numProps
                 totalRelativeLoad = foldl (+) 0 (map (rloadA !) [0 .. numRvrs - 1])
                 targetLoad i = ceiling (widenInteger totalReviews * (rloadA ! i) / totalRelativeLoad)
-               -- A...H refer to idea book p.429
+               -- Edge groups A through H are indicated in the figure in the paper.
                 edgesABC = do
                         i <- [0 .. numRvrs - 1]
                         let tl = targetLoad i
                 edgesABC = do
                         i <- [0 .. numRvrs - 1]
                         let tl = targetLoad i
author	Matt McCutchen <matt@mattmccutchen.net>
	Sun, 12 Feb 2012 06:39:59 +0000 (22:39 -0800)
committer	Matt McCutchen <matt@mattmccutchen.net>
	Sun, 12 Feb 2012 06:39:59 +0000 (22:39 -0800)
paper/flow.fig		patch \| blob \| blame \| history
paper/paper.tex		patch \| blob \| blame \| history
program/ProposalMatcher.hs		patch \| blob \| blame \| history