Skip to content

Instantly share code, notes, and snippets.

@pedroteixeira
Created March 11, 2012 23:15
Show Gist options
  • Star 5 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save pedroteixeira/2018592 to your computer and use it in GitHub Desktop.
Save pedroteixeira/2018592 to your computer and use it in GitHub Desktop.
coursera nlp pa1
;; Starter impl in clojure (use lein to create new project.clj)
(ns pa1.spamlord
(:gen-class
:methods [[process [java.io.BufferedReader] java.util.List]]))
(defn extract-contacts [line]
(let [matches (re-seq #"(\w+)@(\w+)\.edu" line)
to-contact (fn [[_ name domain]]
{"type" "e"
"value" (str name "@" domain ".edu")})]
(map to-contact matches)))
(defn -process [this input]
(doall
(flatten (map extract-contacts (line-seq input)))))
// Modified SpamLord.java to call clojure code
public List<Contact> processFile(String fileName, BufferedReader input) {
List<Contact> contacts = new ArrayList<Contact>();
// for each line
Matcher m;
String email;
try {
List results = new spamlord().process(input);
for(Object object : results) {
java.util.Map contact = (java.util.Map)object;
contacts.add(new Contact(fileName,
(String)contact.get("type"),
(String)contact.get("value")));
}
// for(String line = input.readLine(); line != null; line = input.readLine()) {
// m = myFirstPattern.matcher(line);
// while(m.find()) {
// email = m.group(1) + "@" + m.group(2) + ".edu";
// Contact contact = new Contact(fileName,"e",email);
// contacts.add(contact);
// }
// }
input.close();
} catch(IOException e) {
e.printStackTrace();
System.exit(1);
}
return contacts;
}
//Compile & Run
//cd clj; lein compile; cd ..; javac -cp clj/classes/ -d java/classes/ java/*.java
//java -cp java/classes:clj/classes/:clj/lib/clojure-1.3.0.jar SpamLord data/dev/ data/devGOLD
@pedroteixeira
Copy link
Author

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment