Last active
August 29, 2015 14:17
-
-
Save loganmhb/5f0a8bba1bde20a377d9 to your computer and use it in GitHub Desktop.
Clojurepalooza homework - Sentiment scoring for words
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(def test-responses [{:r 5, :c "This is awesome!"} | |
{:r 1, :c "This sucks!"} | |
{:r 3, :c "This is all right."}]) | |
(defn clean-contents | |
[response] | |
(update-in response | |
[:c] | |
(comp (partial re-seq #"[a-z]+") | |
clojure.string/lower-case))) | |
(defn response->word-map | |
"Takes a response in format {:r Integer :c [vector of words]} and returns a map | |
associating each word in the response with the response's rating." | |
[words response] | |
(->> (interleave (:c response) | |
(repeat [(:r response)])) | |
(apply hash-map) | |
(merge-with concat words))) | |
(defn average-word-score | |
"Takes a kv pair of [word [scores]] and returns a pair of | |
the word and a map of ratings to probabilities that a response | |
containing the word has the given rating." | |
[word [scores]] | |
[word (->> (frequencies scores) | |
(map (fn [[k v]] | |
[k (/ v (count scores))])) | |
(into {}))]) | |
(defn score-sentiments | |
"Takes a col of responses in format {:r Integer :c String} and returns a map | |
associating each word found in the response contents with its average rating." | |
[responses] | |
(->> responses | |
(map clean-contents) | |
(reduce response->word-map {}) | |
(map average-word-score) | |
(into {})) | |
(score-sentiments test-responses) | |
{"all" {3 1}, | |
"right" {3 1}, | |
"sucks" {1 1}, | |
"awesome" {5 1}, | |
"is" {3 1/2, 5 1/2} | |
"this" {3 1/3, 1 1/3, 5 1/3}} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment