Skip to content

Instantly share code, notes, and snippets.

@mwotton
Created July 25, 2013 06:01
Show Gist options
  • Star 8 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save mwotton/6077238 to your computer and use it in GitHub Desktop.
Save mwotton/6077238 to your computer and use it in GitHub Desktop.
{-# LANGUAGE OverloadedStrings #-}
import qualified Network.HTTP.Conduit as H
import Control.Exception
import Web.Scotty
import System.Environment
import System.IO
import System.Directory
import Control.Monad
import Control.Monad.IO.Class
import qualified Data.ByteString.Lazy.Char8 as BS
main :: IO ()
main = do
port <- getEnv "PORT"
cachedir <- getEnv "cachedir"
scotty (read port) $ do
get "/:domain/favicon.ico" $ do
domain <- param "domain"
let filename = cachedir ++ "/" ++ domain
liftIO (proxy domain filename)
header "Content-Disposition" "binary/data"
file filename
proxy :: String -> String -> IO ()
proxy domain filename = do
exists <- doesFileExist filename
when (not exists) fetch
where fetch = handle handler $ H.simpleHttp url >>= BS.writeFile filename
url = "http://" ++ domain ++ "/favicon.ico"
handler :: SomeException -> IO ()
handler e = hPutStrLn stderr (show e)
@mwotton
Copy link
Author

mwotton commented Jul 28, 2013

fetch = handle handler (do
response <- H.simpleHttp url
BS.writeFile filename response)

@rvagg
Copy link

rvagg commented Jul 28, 2013

const http = require('http')
    , path = require('path')
    , fs = require('fs')
    , hyperquest = require('hyperquest')
    , paramify = require('paramify')
    , port = process.ENV.PORT
    , cacheDir = process.ENV.cachedir

function handler (request, response) {
  var match = paramify(request.url)
  if (match('/:domain/favicon.ico'))
    proxy(path.join(cachedir, match.params.domain), response)
}

function proxy (domain, response) {
  fs.exists(filename, function (exists) {
    if (exists)
      return fs.createReadStream(filename).pipe(response)
    hyperquest('http://' + domain + '/favicon.ico')
      .pipe(fs.createWriteStream(filename))
      .on('end', function () {
        fs.createReadStream(filename).pipe(response)
      })
  })
}

http.createServer(handler).listen(port)

@rvagg
Copy link

rvagg commented Jul 28, 2013

with concurrent request cancelling:

/*
npm install request paramify
export PORT=1337
export cachedir=/tmp/
node ./proxy.js
*/

const http = require('http')
    , path = require('path')
    , fs = require('fs')
    , request = require('request')
    , paramify = require('paramify')
    , port = process.env.PORT
    , cacheDir = process.env.cachedir

var fetching = {}

function handler (request, response) {
  var match = paramify(request.url)
  if (match('/:domain/favicon.ico'))
    proxy(match.params.domain, response)
}

function proxy (domain, response) {
  var filename = path.join(cacheDir, domain)
  response.setHeader('content-type', 'image/vnd.microsoft.icon')
  fs.exists(filename, function (exists) {
    if (exists)
      return fs.createReadStream(filename).pipe(response)

    if (fetching[domain])
      return fetching[domain].push(response) // this domain is being fetched

    fetching[domain] = [ response ] // first fetch for this domain

    request('http://' + domain + '/favicon.ico')
      .pipe(fs.createWriteStream(filename))
      .on('close', send.bind(null, domain, filename)) // simple function curry with 'domain' and 'filename' args
  })
}

function send (domain, filename) {
  fetching[domain].forEach(function (response) {
    fs.createReadStream(filename).pipe(response)
  })
  fetching[domain] = null
}

http.createServer(handler).listen(port)

@mwotton
Copy link
Author

mwotton commented Jul 28, 2013

similarly for haskell:

{-# LANGUAGE OverloadedStrings #-}
import qualified Network.HTTP.Conduit as H
import Control.Exception
import Web.Scotty
import System.Environment
import System.IO
import System.Directory
import Control.Monad
import Control.Monad.IO.Class
import Data.Map(Map)
import qualified Data.Map as Map
import qualified  Data.ByteString.Lazy.Char8 as BS
import Control.Concurrent.MVar

main :: IO ()
main = do
  port <- getEnv "PORT"
  cachedir <- getEnv "cachedir"
  pending <- newMVar Map.empty
  scotty (read port) $ do
    get "/:domain/favicon.ico" $ do
      domain <- param "domain"
      let filename = cachedir ++ "/" ++ domain
      liftIO (proxy pending domain filename)
      header "Content-Disposition" "binary/data"
      file filename

proxy :: MVar (Map String (MVar ()) ) -> String -> String -> IO ()
proxy pending domain filename = do
  exists <- doesFileExist filename
  unless exists $ handle handler $ do
    m <- takeMVar pending
    maybe (fetch m) (waitForRef m) (Map.lookup domain m)

  where fetch m = do
          p <- newEmptyMVar
          putMVar pending (Map.insert domain p m)
          H.simpleHttp url >>= BS.writeFile filename
          putMVar p ()
          modifyMVar_ pending (\m -> return $ Map.delete domain m)

        waitForRef m ref = do
          putMVar pending m
          takeMVar ref
          putMVar ref ()

        url = "http://" ++ domain ++ "/favicon.ico"
        handler :: SomeException -> IO ()
        handler e = hPrint stderr e

@mwotton
Copy link
Author

mwotton commented Jul 28, 2013

getting about 5400 req/s here, vs 1300 on node.

@rvagg
Copy link

rvagg commented Jul 28, 2013

A more competitive entry in Node:

/*
npm install request bl async-cache paramify
export PORT=1337
export cachedir=/tmp/
node ./proxy.js
*/

const http = require('http')
    , path = require('path')
    , fs = require('fs')
    , bl = require('bl')
    , request = require('request')
    , AsyncCache = require('async-cache')
    , paramify = require('paramify')
    , port = process.env.PORT
    , cacheDir = process.env.cachedir

var cache = AsyncCache({
    max    : 1000
  , load   : load
})

function handler (request, response) {
  var match = paramify(request.url)
  if (match('/:domain/favicon.ico')) {
    cache.get(match.params.domain, function (err, data) {
      response.setHeader('content-type', 'image/x-icon')
      response.end(data)
    })
  }
}

function load (domain, callback) {
  var filename = path.join(cacheDir, domain)
  fs.exists(filename, function (exists) {
    if (exists)
      return fs.readFile(filename, callback)
    fetch(domain, function (err, data) {
      if (err) return callback(err)
      fs.writeFile(filename, data, function () {
        callback(null, data)
      })
    })
  })
}

function fetch (domain, callback) {
  request('http://' + domain + '/favicon.ico')
    .pipe(bl(function (err, data) {
      if (err) return callback(err)
      callback(null, data.slice())
    }))
}

http.createServer(handler).listen(port)

@rvagg
Copy link

rvagg commented Jul 28, 2013

And squeezing a little bit more out with the cluster module:

/*
npm install request bl async-cache paramify
export PORT=1337
export cachedir=/tmp/
node ./proxy.js
*/

const http = require('http')
    , path = require('path')
    , cluster = require('cluster')
    , fs = require('fs')
    , bl = require('bl')
    , request = require('request')
    , AsyncCache = require('async-cache')
    , paramify = require('paramify')
    , port = process.env.PORT
    , cacheDir = process.env.cachedir
    , cpus = require('os').cpus().length

var cache = AsyncCache({
    max    : 1000
  , load   : load
})

function handler (request, response) {
  var match = paramify(request.url)
  if (match('/:domain/favicon.ico')) {
    cache.get(match.params.domain, function (err, data) {
      response.setHeader('content-type', 'image/x-icon')
      response.end(data)
    })
  }
}

function load (domain, callback) {
  var filename = path.join(cacheDir, domain)
  fs.exists(filename, function (exists) {
    if (exists)
      return fs.readFile(filename, callback)
    fetch(domain, function (err, data) {
      if (err) return callback(err)
      fs.writeFile(filename, data, function () {
        callback(null, data)
      })
    })
  })
}

function fetch (domain, callback) {
  request('http://' + domain + '/favicon.ico')
    .pipe(bl(function (err, data) {
      if (err) return callback(err)
      callback(null, data.slice())
    }))
}

if (cluster.isMaster) {
  for (var i = 0; i < cpus; i++)
    cluster.fork()
} else {
  http.createServer(handler).listen(port)
  console.log('Listening on port', port)
}

@mwotton
Copy link
Author

mwotton commented Jul 29, 2013

Probably the final version i'll post.

{-# LANGUAGE OverloadedStrings #-}
import Network.HTTP
import Control.Exception
import Network.Wai
import Network.Wai.Handler.Warp
import System.Environment
import System.IO
import System.Directory
import Control.Monad.IO.Class
import Data.Map(Map)
import qualified Data.Map as Map
import qualified  Data.ByteString.Char8 as BS
import qualified  Data.ByteString.Lazy.Char8 as BL
import Control.Concurrent.MVar
import Blaze.ByteString.Builder (fromByteString)
import Network.HTTP.Types (status200,status404)
import Control.Applicative
import Data.IORef
import Data.Maybe
import Network.Browser

main :: IO ()
main = do
  port <- getEnv "PORT"
  cachedir <- BS.pack <$> getEnv "cachedir"
  pending <- newMVar Map.empty
  memCache <- newIORef []
  run (read port) $ \req -> do
    let (domain,path)= BS.breakSubstring "/favicon.ico" (rawPathInfo req)
    -- liftIO $ print (domain, path, rawPathInfo req)
    if path == "/favicon.ico"
      then do
        let dom = BS.tail domain

        let filename = BS.concat [cachedir,domain]
        f <- liftIO (proxy pending memCache dom (BS.unpack filename))
        return $ ResponseBuilder status200 [("Content-Disposition", "binary/data")] $ fromByteString f
      else return $ ResponseBuilder status404 [] $ fromByteString ""

proxy :: MVar (Map BS.ByteString (MVar ()) ) -> IORef ([(BS.ByteString, BS.ByteString)]) -> BS.ByteString -> String -> IO BS.ByteString
proxy pending memcache domain filename = do
  cached <- readIORef memcache
  case lookup domain cached of
    Just resp -> return resp
    Nothing -> do
      exists <- doesFileExist filename
      handle handler $ do
        m <- takeMVar pending
        maybe (fetch m memcache) (waitForRef m memcache) (Map.lookup domain m)

  where fetch m cache  = do
          p <- newEmptyMVar
          putMVar pending (Map.insert domain p m)
          (_, resp_raw) <- Network.Browser.browse $ do
                                  setAllowRedirects True
                                  request (getRequest $ BS.unpack $ BS.concat ["http://", domain, "/favicon.ico"])
          let resp =  rspBody resp_raw
          writeFile filename resp
          let strict = BS.pack resp
          atomicModifyIORef cache (\x -> ((domain,strict):x, ()))
          putMVar p ()
          modifyMVar_ pending (\m -> return $ Map.delete domain m)
          return strict

        waitForRef m cache ref = do
          putMVar pending m
          takeMVar ref
          putMVar ref ()
          readIORef cache >>= return  . fromJust . lookup domain

        handler :: SomeException -> IO BS.ByteString
        handler e = hPrint stderr e >> return "error"

@mwotton
Copy link
Author

mwotton commented Jul 29, 2013

This requires GHC HEAD.

weighttp -n 1000000 -c 100 -t 4 -k localhost:3001/google.com/favicon.ico

to warm it up, then

weighttp -n 10000000 -c 100 -t 4 -k localhost:3001/google.com/favicon.ico

for the actual test (4 threads simulating 25 concurrent connections apiece)

node: 44019 req/s
haskell: 112398 req/s

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment