Created
March 16, 2010 12:43
-
-
Save devn/333921 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(ns irc-parser | |
(:use clojure.contrib.duck-streams | |
clojure.contrib.str-utils)) | |
(defn flatten | |
"Takes any nested combination of sequential things (lists, vectors, | |
etc.) and returns their contents as a single, flat sequence. | |
(flatten nil) returns nil." | |
[x] | |
(filter (complement sequential?) | |
(rest (tree-seq sequential? seq x)))) | |
(def dates (file-seq (java.io.File. "/Users/defn/git/clojure-irc/"))) | |
(defn parse-irc-log | |
"Gets rid of all the junk in the irc log groupings and gives us a list of strings which is the content found in the channel." | |
[logfile] | |
(remove (or nil? "") | |
(map #(rest (re-split #".*<.*>\s" %)) | |
(read-lines (str logfile))))) | |
(def parsed (map flatten (map parse-irc-log (rest dates)))) | |
;;(filter #(re-matches #".*zipmap.*") parsed) | |
(map #(re-find #"zipmap" (str %)) parsed) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment