From: Matt McCutchen Date: Sat, 12 Jul 2008 05:58:57 +0000 (-0400) Subject: - Implement CS2 min-cost-flow adaptor and generalize common min-cost-flow stuff X-Git-Url: https://mattmccutchen.net/match/match.git/commitdiff_plain/fd0d2377785ca843a46b0050a7351dac82c84777 - Implement CS2 min-cost-flow adaptor and generalize common min-cost-flow stuff accordingly. - Create conflict-of-interest edges (with zero capacity) so doMatching doesn't crash when it queries their flow values. - Factor out TestUtils. - Add a function "goGraph" to gnome-open a visualization of a graph from within GHCi! Remove old "show-graph". --- diff --git a/program/ArrayStuff.hs b/program/ArrayStuff.hs index abf14c7..8fcced9 100644 --- a/program/ArrayStuff.hs +++ b/program/ArrayStuff.hs @@ -4,6 +4,8 @@ import Data.Array.IArray funcArray lohi f = listArray lohi $ map f $ range lohi +constArray lohi v = listArray lohi $ repeat v + transposeArray arr = let swap (x, y) = (y, x) in let (lo, hi) = bounds arr in diff --git a/program/CS2MinCostFlow.hs b/program/CS2MinCostFlow.hs new file mode 100644 index 0000000..47a73f5 --- /dev/null +++ b/program/CS2MinCostFlow.hs @@ -0,0 +1,89 @@ +module CS2MinCostFlow (minCostFlow) where +import IMinCostFlow +import IOStuff +import System.IO.Unsafe +import Data.Graph.Inductive.Graph +import Data.Array.IArray +import Data.List +import Data.Function + +-- Configure the path to cs2.exe relative to the program/ directory here. +cs2cmd = "./cs2.exe" + +runCS2 :: String -> String +-- Using unsafePerformIO is non-ideal, but it gives a consistent interface +-- for the min-cost flow function. +runCS2 inData = unsafePerformIO (interactWithCommand cs2cmd inData) + +data MCFEdge i f c = MCFEdge { + eFrom :: Node, + eTo :: Node, + eCost :: c, + eMIdx :: Maybe i, + eCap :: f +} deriving (Eq, Ord) + +round2 :: Real a => a -> Int +round2 x = fromInteger (round (toRational x)) + +minCostFlow :: MinCostFlowImpl +minCostFlow idxBounds edgeIdx edgeCap edgeCost theGraph (source, sink) = + let + (nLo, nHi) = nodeRange theGraph + theEdges = labEdges theGraph + -- HACK: Add a highly negative-cost edge from sink to + -- source to get CS2 to compute a max flow. + edges2 = MCFEdge sink source (-100000) Nothing 10000 : + map (\(n1, n2, l) -> MCFEdge n1 n2 (edgeCost l) (Just (edgeIdx l)) (edgeCap l)) + theEdges + -- HACK: Round capacities and costs to integers so CS2 can + -- handle them. The proposal matcher's capacities are integers, + -- and its costs are so large that the error should be insignificant. + inData = "p min " ++ show (nHi + 1 - nLo) ++ " " ++ show (length edges2) ++ "\n" + ++ "n 1 0\n" -- Dummy node description to make CS2 parser happy. + ++ concatMap (\(MCFEdge n1 n2 cost _ cap) -> + "a " ++ show (n1 - nLo + 1) ++ " " ++ show (n2 - nLo + 1) + ++ " 0 " ++ show (round2 cap) + ++ " " ++ show (round2 cost) ++ "\n") + edges2 + outData = runCS2 inData + -- Unfortunately CS2 doesn't support edge ID numbers, so we + -- have to manually apply the "flow items" it produced to the + -- appropriate edges in order of increasing cost. + -- Extract ((n1, n2), f) tuples from the output. + flowItems = concatMap (\l -> let w:ws = words l in + if w == "f" + then let + [n1s, n2s, fs] = ws + n1 = (read n1s :: Int) - 1 + nLo + n2 = (read n2s :: Int) - 1 + nLo + fv = fromInteger (toInteger (read fs :: Int)) + in [((n1, n2), fv)] + else [] + ) (lines outData) + -- Total the flow for each node pair (n1, n2) to simplify matters. + flowGroups = groupBy ((==) `on` fst) (sort flowItems) + npFlows = map (\l@((n12, _):_) -> + (n12, sum $ map snd l)) flowGroups + applyFlows fis [] = case fis of + [] -> [] + _ -> error "CS2MinCostFlow: some flow items could not be applied" + applyFlows fis es@(e@(MCFEdge n1 n2 _ mi cap):moreEs) = + let (ef, fisLeft) = case fis of + -- Note to self: One can't test equality in a + -- pattern by reusing a variable name. Use a + -- guard instead. + ((fn1, fn2), fv):moreFis | fn1 == n1 && fn2 == n2 -> + -- This edge gets (min f cap) flow. + (min fv cap, if fv > cap + then ((n1, n2), fv - cap) : moreFis + else moreFis) + _ -> (0, fis) -- No flow for this edge. + in (mi, ef) : applyFlows fisLeft moreEs + theEdgeFlows = applyFlows npFlows (sort edges2) + -- Get rid of the flow on our hack edge. + realEdgeFlows = concatMap (\(mi, ef) -> case mi of + Just i -> [(i, ef)] + Nothing -> [] + ) theEdgeFlows + in array idxBounds realEdgeFlows diff --git a/program/IMinCostFlow.hs b/program/IMinCostFlow.hs new file mode 100644 index 0000000..edd43a2 --- /dev/null +++ b/program/IMinCostFlow.hs @@ -0,0 +1,16 @@ +module IMinCostFlow where +import Data.Array.IArray +import Data.Graph.Inductive.Graph + +type MinCostFlowImpl1 gr a b i f c = + (i, i) -> -- Range of edge indices + (b -> i) -> -- Edge label -> unique edge index + (b -> f) -> -- Edge label -> flow capacity + (b -> c) -> -- Edge label -> cost per unit of flow + gr a b -> -- Graph + (Node, Node) -> -- (source, sink) + Array i f -- ! edge index -> flow value + +type MinCostFlowImpl = + forall gr a b i f c. (Graph gr, Ix i, Real f, Real c) => + MinCostFlowImpl1 gr a b i f c diff --git a/program/IOStuff.hs b/program/IOStuff.hs new file mode 100644 index 0000000..fcebc48 --- /dev/null +++ b/program/IOStuff.hs @@ -0,0 +1,38 @@ +module IOStuff (interactWithCommand) where +import System.IO +import System.Process +import System.Exit +import Control.Concurrent +import Control.Concurrent.MVar + +-- Like hGetContents but reads all the input before returning, resulting in +-- more predictable behavior. +hStrictGetContents :: Handle -> IO String +hStrictGetContents h = do + c <- hGetContents h + -- Note to self: The seq has to be *outside* the return. Otherwise the + -- seqified thunk will just be returned to the caller, defeating the + -- purpose. + seq (length c) $ return c + +interactWithCommand :: String -> String -> IO String +interactWithCommand cmd inData = do + (inH, outH, errH, pid) <- runInteractiveCommand cmd + forkIO (do + hPutStr inH inData + hClose inH) + outDataMV <- newEmptyMVar + forkIO (do + outData <- hStrictGetContents outH + putMVar outDataMV outData) + errDataMV <- newEmptyMVar + forkIO (do + errData <- hStrictGetContents errH + putMVar errDataMV errData) + outData <- takeMVar outDataMV + errData <- takeMVar errDataMV + ex <- waitForProcess pid + if ex == ExitSuccess && length errData == 0 + then return outData + else error $ "Command " ++ show cmd ++ " failed: " + ++ "stderr " ++ show errData ++ ", exit " ++ show ex diff --git a/program/Instance.hs b/program/Instance.hs index 07841e3..384666f 100644 --- a/program/Instance.hs +++ b/program/Instance.hs @@ -1,11 +1,10 @@ -module Instance where +module Instance (module Instance, Wt) where +import ProposalMatcherConfig (Wt) import Data.Array.IArray import Data.Array.Unboxed import ArrayStuff import Formatter -type Wt = Double -- must implement RealFrac - data Instance = Instance Int -- numReviewers Int -- numProposals diff --git a/program/InstanceGenerator.hs b/program/InstanceGenerator.hs index 23b20a1..ab56d5f 100644 --- a/program/InstanceGenerator.hs +++ b/program/InstanceGenerator.hs @@ -65,6 +65,7 @@ randomInstance numRvrs numProps = do jj = proposalInfos ! j topicPref = case fst jj of PTopic1 t1 -> expertnessToPref (ii ! t1) - PTopic2 t1 t2 -> (expertnessToPref (ii ! t1) + expertnessToPref (ii ! t2)) / 2 + PTopic2 t1 t2 -> (expertnessToPref (ii ! t1) + + expertnessToPref (ii ! t2)) / 2 in topicPref * snd jj - 4) return $ Instance numRvrs numProps loadA prefA diff --git a/program/NaiveMinCostFlow.hs b/program/NaiveMinCostFlow.hs index 1347837..94ee20d 100644 --- a/program/NaiveMinCostFlow.hs +++ b/program/NaiveMinCostFlow.hs @@ -1,4 +1,5 @@ module NaiveMinCostFlow (minCostFlow) where +import IMinCostFlow import BellmanFord import MonadStuff import Data.Array.IArray @@ -75,14 +76,8 @@ doFlow state = do augment state augAmt augPath doFlow state -minCostFlow :: forall s gr a b i f c. (Graph gr, Ix i, Real f, Real c) => - (i, i) -> -- Range of edge indices - (b -> i) -> -- Edge label -> unique edge index - (b -> f) -> -- Edge label -> flow capacity - (b -> c) -> -- Edge label -> cost per unit of flow - gr a b -> -- Graph - (Node, Node) -> -- (source, sink) - Array i f -- ! edge index -> flow value +-- We need to put the type parameters in scope for the mkGraph call. +minCostFlow :: forall gr a b i f c. (Graph gr, Ix i, Real f, Real c) => MinCostFlowImpl1 gr a b i f c minCostFlow idxBounds edgeIdx edgeCap edgeCost theGraph (source, sink) = runSTArray (do let ourFlipF isRev l = MCFEdge (edgeIdx l) (edgeCap l) @@ -93,7 +88,12 @@ minCostFlow idxBounds edgeIdx edgeCap edgeCost theGraph (source, sink) = runSTAr (n1, n2, MCFEdge (edgeIdx l) (edgeCap l) ( edgeCost l ) False), (n2, n1, MCFEdge (edgeIdx l) undefined (-(edgeCost l)) True ) ]) $ labEdges theGraph) :: gr a (MCFEdge i f c) - flow <- newArray idxBounds 0 + -- Initialize only the slots of the flow array corresponding to + -- existing edges to zero to catch buggy callers that query + -- nonexistent edges. + flow <- newArray idxBounds undefined + sequence $ map (\(_, _, l) -> writeArray flow (edgeIdx l) 0) + (labEdges theGraph) let state = MCFState graph2 source sink flow doFlow state return (mcfFlow state) diff --git a/program/ProposalMatcher.hs b/program/ProposalMatcher.hs index 46a693c..3720fab 100644 --- a/program/ProposalMatcher.hs +++ b/program/ProposalMatcher.hs @@ -1,12 +1,11 @@ module ProposalMatcher where -import NaiveMinCostFlow import Data.Array.IArray import Data.Graph.Inductive.Graph import Data.Graph.Inductive.Tree import Data.List import Instance -import ProposalMatcherConfig +import ProposalMatcherConfig -- gives us minCostFlow prefBoringness p = if prefIsVeryBoring p then 2 else if prefIsBoring p then 1 else 0 @@ -77,11 +76,14 @@ doReduction (Instance numRvrs numProps rloadA prefA) = i <- [0 .. numRvrs - 1] j <- [0 .. numProps - 1] let pref = prefA ! (i, j) - if prefIsConflict pref - then [] - else [(rvrNode i (prefBoringness pref), - propNode j (prefExpertness pref), - REdge (edIdx (i, j)) 1 (assignmentCost pref))] + -- We must generate an edge even if there is a conflict + -- of interest; otherwise we'll fail to read its flow + -- value in doMatching. + [(rvrNode i (prefBoringness pref), + propNode j (prefExpertness pref), + REdge (edIdx (i, j)) + (if prefIsConflict pref then 0 else 1) + (assignmentCost pref))] edgesEFGH = do j <- [0 .. numProps - 1] let edgeE = (propNode j 2, propNode j 0, REdge undefined 1 (-expertBonus)) diff --git a/program/ProposalMatcherConfig.hs b/program/ProposalMatcherConfig.hs index 750cf1f..90bc9b1 100644 --- a/program/ProposalMatcherConfig.hs +++ b/program/ProposalMatcherConfig.hs @@ -1,5 +1,18 @@ -module ProposalMatcherConfig where -import Instance +module ProposalMatcherConfig + (module ProposalMatcherConfig, minCostFlow) where + +-- Choose a min-cost flow implementation (timings on mattlaptop2): + +-- A naive implementation that is slow for all but the smallest instances +-- (30s on a 20x50 example). +import NaiveMinCostFlow + +-- Uses CS2 (http://www.igsystems.com/cs2/), which requires a license for +-- non-research use but is faster (<1s on a 20x50 example, 64s on a 60x500 +-- example). Configure the path to cs2.exe in CS2MinCostFlow.hs. +--import CS2MinCostFlow + +type Wt = Double -- Can be any RealFrac. type Pref = Int diff --git a/program/Test.hs b/program/Test.hs index cef7a53..e684859 100644 --- a/program/Test.hs +++ b/program/Test.hs @@ -1,8 +1,7 @@ module Test ( -- Export everything we need to have fun in GHCi: - - -- See the results of examples. module Test, + module TestUtils, -- Generate instances. module Instance, @@ -14,31 +13,24 @@ module Test ( -- Run randomized things. module System.Random, - module RandomizedMonad, - - -- Visualize graphs. - module Data.Graph.Inductive.Graphviz + module RandomizedMonad ) where +import TestUtils import Instance import InstanceGenerator import ProposalMatcher -import ProposalMatcherConfig +import ProposalMatcherConfig hiding (Wt) import System.Random import RandomizedMonad -import Data.Graph.Inductive.Graphviz -- Other imports we need import BellmanFord -import NaiveMinCostFlow import Data.Array.IArray import Data.Array.Unboxed import Data.Graph.Inductive.Graph import Data.Graph.Inductive.Tree import ArrayStuff --- A fixed-seeded random number generator for reproducible experimentation. -myGen = read "314159265 1" :: StdGen - -- TESTING GRAPH ALGORITHMS myGraph = mkGraph [(0, ()), (1, ()), (2, ())] [(0, 1, (0, 2)), (0, 2, (1, 3)), (2, 1, (2, -2))] :: Gr () (Int, Int) @@ -50,33 +42,6 @@ flowArray = minCostFlow (0, 2) fst (const 1) snd myGraph (0, 1) myNCGraph = mkGraph [(0, ())] [(0, 0, -1)] :: Gr () Int bfNCResult = bellmanFord id 0 myNCGraph --- VISUALIZATION STUFF -data REdgeF = REdgeF Int Int Int Wt -instance Show REdgeF where - show (REdgeF idx cap flow cost) = "#" ++ (show idx) ++ ": " - ++ (show flow) ++ " of " ++ (show cap) ++ " @ " ++ (show cost) -flowAnnotate g fa = - mkGraph (labNodes g) (map (\(n1, n2, REdge i ca co) -> - (n1, n2, REdgeF i ca (fa ! i) co)) $ labEdges g) :: Gr () REdgeF - -showInstanceAsGraph :: Instance -> [(Int, Int)] -> Gr String String -showInstanceAsGraph (Instance numRvrs numProps rloadA prefA) matchedPairs = - let - rvrNode i = i - propNode j = numRvrs + j - numNodes = numRvrs + numProps - theNodes = map (\i -> (rvrNode i, "R#" ++ show i ++ - " (RLoad " ++ show (rloadA ! i) ++ ")")) [0..numRvrs-1] ++ - map (\j -> (propNode j, "P#" ++ show j)) [0..numProps-1] - parenthesizeIf False s = s - parenthesizeIf True s = "(" ++ s ++ ")" - theEdges = do - i <- [0..numRvrs-1] - j <- [0..numProps-1] - return (rvrNode i, propNode j, - parenthesizeIf (elem (i, j) matchedPairs) $ show (prefA ! (i, j))) - in mkGraph theNodes theEdges - -- PROPOSAL-MATCHING EXAMPLES -- Example from idea book p. 425 {- @@ -97,7 +62,7 @@ myPrefs = transposeArray $ listArray ((0,0), (myNumProps-1,myNumRvrs-1)) [ 15, 25, 20, 20, 15 ] :: UArray (Int, Int) Wt -myInst = Instance myNumRvrs myNumProps (funcArray (0, myNumRvrs-1) $ const 1) myPrefs +myInst = Instance myNumRvrs myNumProps (constArray (0, myNumRvrs-1) 1) myPrefs rdnResult = doReduction myInst ReductionResult rrg rrso rrsi rreib rredi = rdnResult diff --git a/program/TestUtils.hs b/program/TestUtils.hs new file mode 100644 index 0000000..f2dbe11 --- /dev/null +++ b/program/TestUtils.hs @@ -0,0 +1,78 @@ +module TestUtils where +import Control.Concurrent +import Data.Array.IArray +import Data.Graph.Inductive.Graph +import Data.Graph.Inductive.Graphviz +import Data.Graph.Inductive.Tree +import System.IO +import System.Random +import System.Posix.IO +import System.Posix.Time +import System.Process +import Instance +import ProposalMatcher +import MonadStuff + +-- This module has stuff that is helpful for testing but isn't itself an example. + +-- A fixed-seeded random number generator for reproducible experimentation. +myGen = read "314159265 1" :: StdGen + +-- Visualization stuff. +data REdgeF = REdgeF Int Int Int Wt +instance Show REdgeF where + show (REdgeF idx cap flow cost) = "#" ++ (show idx) ++ ": " + ++ (show flow) ++ " of " ++ (show cap) ++ " @ " ++ (show cost) +flowAnnotate g fa = + mkGraph (labNodes g) (map (\(n1, n2, REdge i ca co) -> + (n1, n2, REdgeF i ca (fa ! i) co)) $ labEdges g) :: Gr () REdgeF + +showInstanceAsGraph :: Instance -> [(Int, Int)] -> Gr String String +showInstanceAsGraph (Instance numRvrs numProps rloadA prefA) matchedPairs = + let + rvrNode i = i + propNode j = numRvrs + j + numNodes = numRvrs + numProps + theNodes = map (\i -> (rvrNode i, "R#" ++ show i ++ + " (RLoad " ++ show (rloadA ! i) ++ ")")) [0..numRvrs-1] ++ + map (\j -> (propNode j, "P#" ++ show j)) [0..numProps-1] + parenthesizeIf False s = s + parenthesizeIf True s = "(" ++ s ++ ")" + theEdges = do + i <- [0..numRvrs-1] + j <- [0..numProps-1] + return (rvrNode i, propNode j, + parenthesizeIf (elem (i, j) matchedPairs) $ show (prefA ! (i, j))) + in mkGraph theNodes theEdges + +goFile :: String -> IO () +goFile fname = do + pid <- runCommand ("gnome-open " ++ fname) + waitForProcess pid -- gnome-open exits immediately + nop + +createHandlePipe :: IO (Handle, Handle) +createHandlePipe = do + (rFd, wFd) <- createPipe + rH <- fdToHandle rFd + wH <- fdToHandle wFd + return (rH, wH) + +-- GHCi seems to crash if I call this on a "showInstanceAsGraph" result without +-- having previously forced evaluation of the matching. +goGraph :: (Show a, Show b, Graph gr) => gr a b -> IO () +goGraph theGraph = + -- First generate graphviz code. + let gvCode = graphviz' theGraph in do + -- Then have `dot' convert it to postscript in a file. + (rH, wH) <- createHandlePipe + pt <- epochTime + let fname = "graph-" ++ show pt ++ ".ps" + dotPid <- runProcess "dot" ["-Tps", "-o", fname] + Nothing Nothing (Just rH) Nothing Nothing + forkIO (do + hPutStr wH gvCode + hClose wH) + waitForProcess dotPid + -- Then open the file. + goFile fname diff --git a/program/show-graph b/program/show-graph deleted file mode 100755 index 09133e5..0000000 --- a/program/show-graph +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash -set -e -echo "Paste the escaped graphviz string from ghci." -IFS='' read -r egvinput -eval "echo \$'$egvinput'" | dot -Tps -o thegraph.ps -go thegraph.ps