Skip to content

Instantly share code, notes, and snippets.

@bbengfort
Last active December 21, 2015 06:29
Show Gist options
  • Save bbengfort/6264904 to your computer and use it in GitHub Desktop.
Save bbengfort/6264904 to your computer and use it in GitHub Desktop.
A data structure for five star ranking preference expressions wrapped in a reader for TSV data.
#!/usr/bin/env python
import csv
import json
class PreferenceReader(object):
"""
A reader that can read the recommended format for storing preferences
of books in a tab-separated file. The recommended format is:
critic title rating
This class reads the file and gives access to each property. Note that
the data is primarily read only, only providing public access to the
data, not allowing modifications to it.
"""
def __init__(self, path):
"""
Supply the path to the TSV file.
"""
self._path = path
self._data = {}
@property
def data(self):
"""
On access, loads the internal data structure from the file.
"""
if not self._data:
for critic, title, rating in self.readlines():
if critic in self._data:
self._data[critic][title] = rating
else:
self._data[critic] = {title: rating}
return self._data
@property
def critics(self):
"""
Returns the set of critics in the data.
"""
return self.data.keys()
@property
def titles(self):
"""
Returns the set of titles in the data.
"""
titles = []
for value in self.data.values():
titles.extend(value.keys())
return set(titles)
def readlines(self):
"""
Reads the TSV file and adds fieldnames to our data structure, as
well as ensures that the rating is the correct `float` type.
Yields a tuple- (critic, title, rating)
"""
with open(self._path, 'rb') as data:
reader = csv.reader(data, delimiter='\t')
for row in reader:
yield row[0], row[1], float(row[2])
def __str__(self):
"""
Returns a JSON represntation of the data structure.
"""
return json.dumps(self.data, indent=2)
def __getitem__(self, critic):
"""
Quickly get access to a particular critic.
"""
return self.data[critic]
def __contains__(self, critic):
"""
Determine if the critic is in the data set.
"""
return critic in self.data
def __iter__(self):
"""
Loop through all critics and return their ratings, this behaves
similarly to the iteration context of a Python dictionary.
"""
for critic in self.data: yield critic
def __len__(self):
"""
Returns the number of critics in the data.
"""
return len(self.critics)
if __name__ == "__main__":
reader = PreferenceReader('../data/book_ratings.tsv')
print reader
print
print reader['Jenny Kim']
# tests.reader_tests
# Tests the reader module in collabtut
#
# Author: Benjamin Bengfort <ben@cobrain.com>
# Created: Fri Aug 23 09:16:09 2013 -0400
#
# Copyright (C) 2013 Cobrain Company
# For license information, see LICENSE.txt
#
# ID: reader_tests.py.py [] ben@cobrain.com $
"""
Tests the preference reader module in collabtut.
"""
##########################################################################
## Imports
##########################################################################
import os
import unittest
from collabtut.reader import PreferenceReader
##########################################################################
## Test Cases
##########################################################################
class ReaderTest(unittest.TestCase):
FIXTURE_PATH = 'test_ratings_fixture.tsv'
def setUp(self):
"""
Create test data fixture and write to file.
"""
data = (
("Lisa", "Lady in the Water", 2.5),
("Lisa", "Snakes on a Plane", 3.5),
("Lisa", "Just My Luck", 3.0),
("Lisa", "Superman Returns", 3.5),
("Lisa", "You, Me, and Dupree", 2.5),
("Lisa", "The Night Listener", 3.0),
("Gene", "Lady in the Water", 3.0),
("Gene", "Snakes on a Plane", 3.5),
("Gene", "Just My Luck", 1.5),
("Gene", "Superman Returns", 5.0),
("Gene", "The Night Listener", 3.0),
("Gene", "You, Me, and Dupree", 3.5),
)
with open(self.FIXTURE_PATH, 'w') as testdata:
for item in data:
testdata.write("\t".join([str(i) for i in item]) + "\n")
self.reader = PreferenceReader(self.FIXTURE_PATH)
def tearDown(self):
"""
Remove test data fixture
"""
os.remove(self.FIXTURE_PATH)
self.reader = None
def test_lazy_load(self):
"""
Ensure data only loaded at access.
"""
self.assertEqual(self.reader._data, {}, "Data loaded before access?")
self.assertTrue(self.reader.data, "No data loaded on access?")
self.assertNotEqual(self.reader._data, {}, "Previously accessed data empty")
def test_critic_inclusion(self):
"""
Ensure that reader captures critics
"""
self.assertIn('Gene', self.reader)
self.assertIn('Lisa', self.reader)
self.assertNotIn('Ben', self.reader)
def test_critic_set(self):
"""
Ensure that no critics are duplicates
"""
self.assertEqual(len(self.reader), 2, "Critic length mismatch")
self.assertEqual(len(set(self.reader)), len(self.reader),
"Duplicate critics exist!")
def test_title_inclusion(self):
"""
Ensure that the correct titles were captured
"""
self.assertIn('Superman Returns', self.reader.titles)
self.assertIn('Just My Luck', self.reader.titles)
self.assertIn('Lady in the Water', self.reader.titles)
self.assertIn('Snakes on a Plane', self.reader.titles)
self.assertIn('You, Me, and Dupree', self.reader.titles)
self.assertIn('The Night Listener', self.reader.titles)
self.assertNotIn('Transformers: Shadow of the Moon', self.reader.titles)
def test_title_set(self):
"""
Ensure that no titles are duplicates
"""
self.assertEqual(len(self.reader.titles), 6, "Title length mismatch")
self.assertEqual(len(set(self.reader.titles)), len(self.reader.titles),
"Duplicate titles exist!")
def test_getitem(self):
"""
Test direct critic access.
"""
self.assertIn('Gene', self.reader, "Contains not correct or bad fixture.")
self.assertTrue(bool(self.reader['Gene']),
"getitem returned empty or False item.")
self.assertRaises(KeyError, self.reader.__getitem__, 'Ben')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment