Skip to content

Instantly share code, notes, and snippets.

@vodik
Created November 22, 2011 19:28
Show Gist options
  • Save vodik/1386651 to your computer and use it in GitHub Desktop.
Save vodik/1386651 to your computer and use it in GitHub Desktop.
module ItsNotWorking (
getLinks
) where
import Text.HTML.TagSoup
import Text.Regex.Posix
import Network.HTTP
isInteresting :: String -> Bool
isInteresting link = any (link =~) supported
where supported = [ "mediafire"
, "rapidshare" ]
cleanUp :: [String] -> [String]
cleanUp [] = []
cleanUp (x:xs) = x : cleanUp (filter (/= x) xs)
openURL :: String -> IO String
openURL url = getResponseBody =<< simpleHTTP (getRequest url)
getTagsFromUrl :: String -> IO [Tag String]
getTagsFromUrl = fmap parseTags . openURL
getLinks :: String -> IO [String]
getLinks url = do
tags <- getTagsFromUrl url
return $ cleanUp [ link | TagOpen "a" atts <- tags
, ("href", link) <- atts
, isInteresting link ]
module Main where
import System.Environment
import System.Directory
import ItsNotWorking
baseURL = "http://itsnotworking.mezoka.com/"
makeURL :: String -> String
makeURL x = baseURL ++ x ++ "text/index.html"
grab :: String -> IO ()
grab x = getLinks (makeURL x) >>= appendFile "links" . unlines
main :: IO ()
main = safeRemoveFile "links" >> getArgs >>= mapM_ grab
safeRemoveFile :: String -> IO ()
safeRemoveFile file = do
exists <- doesFileExist file
if exists
then removeFile file
else return ()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment