Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save haokaibo/a1a1362edf5c1bf99d265a23fe8b4361 to your computer and use it in GitHub Desktop.
Save haokaibo/a1a1362edf5c1bf99d265a23fe8b4361 to your computer and use it in GitHub Desktop.
Test code for Collective Intelligence.
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
<component name="PackageRequirementsSettings">
<option name="requirementsPath" value="" />
</component>
<component name="TestRunnerService">
<option name="PROJECT_TEST_RUNNER" value="Unittests" />
</component>
</module>
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.5.2 (~/anaconda/bin/python)" project-jdk-type="Python SDK" />
</project>
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/CollectiveIntelligence.iml" filepath="$PROJECT_DIR$/.idea/CollectiveIntelligence.iml" />
</modules>
</component>
</project>
# A dictionary of movie critics and their ratings of a small
# set of movies
from math import sqrt
critics = {'Lisa Rose': {'Lady in the Water': 2.5, 'Snakes on a Plane': 3.5,
'Just My Luck': 3.0, 'Superman Returns': 3.5, 'You, Me and Dupree': 2.5,
'The Night Listener': 3.0},
'Gene Seymour': {'Lady in the Water': 3.0, 'Snakes on a Plane': 3.5,
'Just My Luck': 1.5, 'Superman Returns': 5.0, 'The Night Listener': 3.0,
'You, Me and Dupree': 3.5},
'Michael Phillips': {'Lady in the Water': 2.5, 'Snakes on a Plane': 3.0,
'Superman Returns': 3.5, 'The Night Listener': 4.0},
'Claudia Puig': {'Snakes on a Plane': 3.5, 'Just My Luck': 3.0,
'The Night Listener': 4.5, 'Superman Returns': 4.0,
'You, Me and Dupree': 2.5},
'Mick LaSalle': {'Lady in the Water': 3.0, 'Snakes on a Plane': 4.0,
'Just My Luck': 2.0, 'Superman Returns': 3.0, 'The Night Listener': 3.0,
'You, Me and Dupree': 2.0},
'Jack Matthews': {'Lady in the Water': 3.0, 'Snakes on a Plane': 4.0,
'The Night Listener': 3.0, 'Superman Returns': 5.0, 'You, Me and Dupree': 3.5},
'Toby': {'Snakes on a Plane': 4.5, 'You, Me and Dupree': 1.0, 'Superman Returns': 4.0}}
# Returns a distance-based similarity score for person1 and person2
def sim_distance(prefs, person1, person2):
# Get the list of shared_items
si = {}
for item in prefs[person1]:
if item in prefs[person2]:
si[item] = 1
# if they have no ratings in common, return 0
if len(si) == 0: return 0
# Add up the squares of all the differences
sum_of_squares = sum([pow(prefs[person1][item] - prefs[person2][item], 2)
for item in prefs[person1] if item in prefs[person2]])
return 1 / (1 + sum_of_squares)
# Returns the Pearson correlation coefficient for p1 and p2
def sim_pearson(prefs, p1, p2):
# Get the list of mutually rated items
si = {}
for item in prefs[p1]:
if item in prefs[p2]: si[item] = 1
# Find the number of elements
n = len(si)
# if they are no ratings in common, return 0
if n == 0: return 0
# Add up all the preferences
sum1 = sum([prefs[p1][it] for it in si])
sum2 = sum([prefs[p2][it] for it in si])
# Sum up the squares
sum1Sq = sum([pow(prefs[p1][it], 2) for it in si])
sum2Sq = sum([pow(prefs[p2][it], 2) for it in si])
# Sum up the products
pSum = sum([prefs[p1][it] * prefs[p2][it] for it in si])
# Calculate Pearson score
num = pSum - (sum1 * sum2 / n)
den = sqrt((sum1Sq - pow(sum1, 2) / n) * (sum2Sq - pow(sum2, 2) / n))
if den == 0: return 0
r = num / den
return r
# Returns the best matches for person from the prefs dictionary.
# Number of results and similarity function are optional params.
def topMatches(prefs, person, n=5, similarity=sim_pearson):
scores = [(similarity(prefs, person, other), other)
for other in prefs if other != person]
# Sort the list so the highest scores appear at the top
scores.sort(reverse=True)
return scores[0:n]
import os
import unittest
import logging
from recommend.recommendations import critics, sim_distance, sim_pearson, topMatches
class RecommendationsTest(unittest.TestCase):
def setUp(self):
logging.basicConfig(level=logging.INFO)
self.base_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)))
if not os.path.exists(self.base_dir):
os.makedirs(self.base_dir)
def tearDown(self):
pass
def test_sim_distance(self):
r = sim_distance(critics, 'Lisa Rose', 'Gene Seymour')
self.assertEqual(0.14814814814814814, r)
def test_sim_pearson(self):
r = sim_pearson(critics, 'Lisa Rose', 'Gene Seymour')
self.assertEqual(0.39605901719066977, r)
def test_topMatches_with_sim_pearson_method(self):
r = topMatches(critics, 'Toby', n=3)
self.assertEqual(
"[(0.9912407071619299, 'Lisa Rose'), (0.9244734516419049, 'Mick LaSalle'), (0.8934051474415647, 'Claudia Puig')]",
str(r))
if __name__ == '__main__':
unittest.main(warnings='ignore')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment