-
-
Save rblaze/4563612 to your computer and use it in GitHub Desktop.
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
const int infinity = 2147483647; | |
struct edge_t { | |
int v1; | |
int v2; | |
int cost; | |
}; | |
int | |
main(void) { | |
int nvertex, nedge; | |
scanf("%d %d\n", &nvertex, &nedge); | |
int nalledges = nedge + nvertex; | |
struct edge_t *edges = malloc(nalledges * sizeof(struct edge_t)); | |
// read input | |
for (int i = 0; i < nedge; i++) { | |
scanf("%d %d %d\n", &edges[i].v1, &edges[i].v2, &edges[i].cost); | |
} | |
// add edges from fake vertex | |
for (int i = 0; i < nvertex; i++) { | |
int idx = nedge + i; | |
edges[idx].v1 = 0; | |
edges[idx].v2 = i + 1; | |
edges[idx].cost = 0; | |
} | |
int *c1 = malloc((nvertex + 1) * sizeof(int)); | |
int *c2 = malloc((nvertex + 1) * sizeof(int)); | |
c1[0] = 0; | |
for (int i = 1; i < nvertex + 1; i++) { | |
c1[i] = infinity; | |
} | |
for (int n = 0; n < nvertex + 2; n++) { | |
for (int i = 0; i < nalledges; i++) { | |
int target = edges[i].v2; | |
int newcost = c1[edges[i].v1]; | |
if (newcost != infinity) { | |
newcost += edges[i].cost; | |
} | |
if (newcost < c1[target]) { | |
c1[target] = newcost; | |
} | |
} | |
if (n == nvertex) { | |
memcpy(c2, c1, (nvertex + 1) * sizeof(int)); | |
} | |
} | |
/* | |
for (int i = 0; i < nvertex + 1; i++) { | |
printf("%d\n", c1[i]); | |
} | |
*/ | |
if (memcmp(c1, c2, (nvertex + 1) * sizeof(int))) { | |
printf("Cycle\n"); | |
} else { | |
int v = c1[0]; | |
for (int i = 1; i < nvertex + 1; i++) { | |
if (c1[i] < v) { | |
v = c1[i]; | |
} | |
} | |
printf("Shortest path %d\n", v); | |
} | |
return 0; | |
} |
{-# LANGUAGE BangPatterns #-} | |
module Main where | |
import Control.DeepSeq | |
import Data.Functor | |
import Data.Int | |
import Data.Vector.Unboxed ((//)) | |
import qualified Data.Vector.Unboxed as V | |
--import Debug.Trace | |
type Vertex = Int | |
type Dist = Int32 | |
type Edge = (Vertex, Vertex, Dist) | |
type EdgeVec = V.Vector Edge | |
type CostVec = V.Vector Dist | |
readEdge :: String -> Edge | |
readEdge s = let [v1, v2, w] = words s | |
in (read v1, read v2, read w) | |
bfStep :: EdgeVec -> CostVec -> CostVec | |
bfStep edges !prev = V.unsafeAccumulate min prev $ V.map mincost edges | |
where | |
mincost :: Edge -> (Int, Int32) | |
mincost (s, h, c) = (h, cost s c) | |
cost w c = let precost = prev `V.unsafeIndex` w | |
in if precost == maxBound then maxBound else precost + c | |
mkEdgeVec :: Int -> [String] -> EdgeVec | |
mkEdgeVec nvert inp = V.unfoldr step (nvert, inp) | |
where | |
step (n, s:xs) = Just (readEdge s, (n, xs)) | |
step (0, []) = Nothing | |
step (!n, []) = Just ((0, n, 0), (n - 1, [])) | |
main :: IO() | |
main = do | |
header:body <- lines <$> getContents | |
let nvert = read $ head $ words header | |
let edgelist = mkEdgeVec nvert body | |
let bfbase = V.replicate (nvert + 1) maxBound // [(0, 0)] | |
print $ edgelist `deepseq` "running" | |
let bfout = iterate (bfStep edgelist) bfbase !! nvert | |
let bfcheck = bfStep edgelist bfout | |
let hasCycle = V.any id $ V.zipWith (/=) bfout bfcheck | |
putStrLn $ if hasCycle then "Cycle" else show $ V.minimum bfout |
In-place updates in bfStep:
{-# LANGUAGE BangPatterns #-}
module Main where
import Data.Functor
import Data.Function
import Data.List (sortBy, groupBy)
import Data.Vector.Unboxed ((//), (!))
import Control.DeepSeq
import qualified Data.Vector.Unboxed as V
import qualified Data.Vector.Unboxed.Mutable as VM
import qualified Data.Vector as A
import Control.Monad
import Data.Maybe
import Debug.Trace
type Vertex = Int
type Dist = Int
data Edge = Edge Vertex Vertex Dist
type Vec = V.Vector Dist
type Edges = [(Vertex, Dist)]
type EdgeList = A.Vector Edges
readEdge :: String -> Edge
readEdge s = let [v1, v2, w] = map read $ words s
in Edge (v1 - 1) (v2 - 1) w
edgeHead :: Edge -> Vertex
edgeHead (Edge _ v _) = v
bfStep :: EdgeList -> Vec -> Vec
bfStep edges prev =
trace "." $ V.modify (\v -> mapM_ (setmin v) [0..(VM.length v - 1)]) prev
where
setmin v i = do
oldcost <- VM.unsafeRead v i
costs <- catMaybes <$> mapM (cost oldcost v) (A.unsafeIndex edges i)
let newcost = if null costs then oldcost else minimum costs
if newcost<oldcost
then VM.unsafeWrite v i newcost
else return ()
cost oldcost v (w, c) = do
precost <- VM.unsafeRead v w
let cost = precost + c
return $ if precost == maxBound
then Nothing else if cost < oldcost
then Just cost else Nothing
mkEdges :: [Edge] -> (Vertex, Edges)
mkEdges edges = (edgeHead (head edges), map (\(Edge v _ c) -> (v, c)) edges)
main :: IO()
main = do
header:body <- lines <$> getContents
let nvert = read $ head $ words header
let edges = sortBy (compare `on` edgeHead) $ map readEdge body
let edgelist = A.accum (++) (A.replicate (nvert + 1) [(nvert, 0)])
(map mkEdges $ groupBy ((==) `on` edgeHead) edges)
let bfbase = V.replicate (nvert + 1) maxBound // [(nvert, 0)]
let bfout = iterate (bfStep edgelist) bfbase !! nvert
let bfcheck = bfStep edgelist bfout
let hasCycle = V.any id $ V.zipWith (/=) bfout bfcheck
putStrLn $ if hasCycle then "Cycle" else "No cycle" -- show $ V.minimum s1
getContents
Дальше не читал.
IO в этой задаче никого не колышет, при времени работы в час-то.
Третья хаскельная версия - 19 минут, проигрыш Си всего вдесятеро!
accumulate и итерация по vector гораздо быстрее, чем update и списки.
Ускоряем сишечку больше чем в три раза (и хаскель опять отстает в 30 раз) почти ничего не меняя:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
struct edge_t {
short int v1;
short int v2;
short int cost;
};
int
main(void) {
int nvertex, nedge;
scanf("%d %d\n", &nvertex, &nedge);
int nalledges = nedge;
struct edge_t *edges = malloc(nalledges * sizeof(struct edge_t));
// read input
for (int i = 0; i < nedge; i++) {
scanf("%hd %hd %hd\n", &edges[i].v1, &edges[i].v2, &edges[i].cost);
}
int *c1 = malloc((nvertex + 1) * sizeof(int));
int *c2 = malloc((nvertex + 1) * sizeof(int));
for (int i = 0; i < nvertex + 1; i++) {
c1[i] = 0;
}
for (int n = 0; n < nvertex + 2; n++) {
for (int i = 0; i < nalledges; i++) {
int newcost = c1[edges[i].v1]+edges[i].cost;
if (newcost >= 0 ) continue;
int target = edges[i].v2;
if (newcost < c1[target]) c1[target] = newcost;
}
if (n == nvertex) {
memcpy(c2, c1, (nvertex + 1) * sizeof(int));
}
}
if (memcmp(c1, c2, (nvertex + 1) * sizeof(int))) {
printf("Cycle\n");
} else {
int v = c1[0];
for (int i = 1; i < nvertex + 1; i++) {
if (c1[i] < v) {
v = c1[i];
}
}
printf("Shortest path %d\n", v);
}
return 0;
}
Финальные версии - C 1:30, Хаскель 3:30, из них загрузка данных 20 секунд. Основное ускорение - на замене Int на Int32. Если бы еще и Vector можно было не через Int индексировать... Видимо, 32-битная версия программы в данном случае будет оптимальнее.
Второй серьезный выигрыш - llvm. Его оптимизатор умнее ghc.
Третий, не очень серьезный - unsafe* для векторов.
@adept, практически то же самое, что pure версия, 3:15. От монад мало пользы в данном случае.
У меня 1.30 на large.txt с -O2 -fllvm и вектором, собранным с llvm:
dmz@zen:/special-olympic$ ^C/special-olympic$ time ./bf < ./large.txt
dmz@zen:
"running"
-6
real 1m30.123s
user 1m29.810s
sys 0m0.116s
{-# LANGUAGE BangPatterns #-}
module Main where
import Control.DeepSeq
import Data.Functor
import Data.Int
import Data.Either (rights)
import Data.Vector.Unboxed ((//))
import qualified Data.Vector.Unboxed as V
import qualified Data.Text as T
import qualified Data.Text.Read as R
import qualified Data.Text.Encoding as E
import qualified Data.ByteString as BS
type Vertex = Int
type Dist = Int32
type Edge = (Vertex, Vertex, Dist)
type EdgeVec = V.Vector Edge
type CostVec = V.Vector Dist
readEdge :: T.Text -> Edge
readEdge s = let [v1, v2, w] = ww
in ((fromIntegral.fst) v1, (fromIntegral.fst) v2, (fromIntegral.fst) w)
where ww = rights $! map (R.signed $ R.decimal) $ take 3 $ T.words s
bfStep :: EdgeVec -> CostVec -> CostVec
bfStep edges !prev = V.unsafeAccumulate min prev $ V.map mincost edges
where
mincost :: Edge -> (Int, Int32)
mincost (s, h, c) = (h, cost s c)
cost w c = let precost = prev `V.unsafeIndex` w
in if precost == maxBound then maxBound else precost + c
mkEdgeVec :: Int -> [T.Text] -> EdgeVec
mkEdgeVec nvert inp = V.unfoldr step (nvert, inp)
where
step (n, s:xs) = Just (readEdge s, (n, xs))
step (0, []) = Nothing
step (!n, []) = Just ((0, n, 0), (n - 1, []))
main :: IO()
main = do
header:body <- (T.lines . E.decodeASCII) <$> BS.getContents
let nvert = (read . T.unpack) $ head $ T.words header
let edgelist = mkEdgeVec nvert body
let bfbase = V.replicate (nvert + 1) maxBound // [(0, 0)]
print $ edgelist `deepseq` "running"
let bfout = iterate (bfStep edgelist) bfbase !! nvert
let bfcheck = bfStep edgelist bfout
let hasCycle = V.any id $ V.zipWith (/=) bfout bfcheck
putStrLn $ if hasCycle then "Cycle" else show $ V.minimum bfout
---
dmz@zen:~/special-olympic$ time ./bf < ./large.txt
"running"
-6
real 1m17.621s
user 1m17.369s
sys 0m0.076s
@rblaze было бы странно, если бы была существенная разница. accumulate пишет в мутабельный массив и фьюжн отрабатывает нормально:
$wbfStep :: EdgeVec -> CostVec -> Vector Dist
$wbfStep =
\ (w :: EdgeVec) (w1 :: CostVec) ->
let { Vector ipv ipv1 ipv2 ~ _ <- w1 `cast` ... } in
let { V_3 ipv3 ipv4 ipv5 ipv6 ~ _ <- w `cast` ... } in
let { Vector rb _ rb2 ~ _ <- ipv4 `cast` ... } in
let { Vector rb3 _ rb5 ~ _ <- ipv5 `cast` ... } in
let { Vector rb6 _ rb8 ~ _ <- ipv6 `cast` ... } in
runSTRep
(\ (@ s) (s :: State# s) ->
case >=# ipv1 0 of _ {
False -> (lvl3 ipv1) `cast` ...;
True ->
let { (# s'#, arr# #) ~ _
<- newByteArray# (*# ipv1 4) (s `cast` ...)
} in
letrec {
$s$wa :: Int# -> State# s -> (# State# s, () #)
$s$wa =
\ (sc :: Int#) (sc1 :: State# s) ->
case >=# sc ipv3 of _ {
False ->
let { __DEFAULT ~ wild6 <- indexIntArray# rb5 (+# rb3 sc) } in
let { (# s1#, x# #) ~ _
<- readIntArray# arr# wild6 (sc1 `cast` ...)
} in
let { __DEFAULT ~ wild8 <- indexIntArray# rb2 (+# rb sc) } in
case indexIntArray# ipv2 (+# ipv wild8) of wild9 {
__DEFAULT ->
let { __DEFAULT ~ wild10 <- indexIntArray# rb8 (+# rb6 sc) } in
let {
y1 :: Int#
y1 = +# wild9 wild10 } in
case <=# x# y1 of _ {
False ->
let { __DEFAULT ~ s1
<- (writeIntArray# arr# wild6 y1 s1#) `cast` ...
} in
$s$wa (+# sc 1) s1;
True ->
let { __DEFAULT ~ s1
<- (writeIntArray# arr# wild6 x# s1#) `cast` ...
} in
$s$wa (+# sc 1) s1
};
2147483647 ->
let { __DEFAULT ~ s1
<- (writeIntArray# arr# wild6 x# s1#) `cast` ...
} in
$s$wa (+# sc 1) s1
};
True -> (# sc1, () #)
}; } in
let { (# new_s1, _ #) ~ _
<- $s$wa
0
((copyByteArray# ipv2 (*# ipv 4) arr# 0 (*# ipv1 4) s'#)
`cast` ...)
} in
let { (# s'#1, arr'# #) ~ _
<- unsafeFreezeByteArray# arr# (new_s1 `cast` ...)
} in
(# s'#1 `cast` ..., (Vector 0 ipv1 arr'#) `cast` ... #)
})
Во второй версии - упрощенные версии алгоритмов, без сортировок и т.д. Сишная версия практически не ускорилась, хаскельная притормозилась: 66 минут.