This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| truth = [ | |
| set(['DMDR1U2RA7VN', 'K29U1709EA5R', 'D3NAY0YYFO4P', '58D4CGTDM5VX', 'ZLRB9DMOYSM9', 'J27VW94YYJRP', '77FOA4UNWD8Y', 'W0JQH817T6IE', 'OTXGMC3STDZ7', 'F4R4MW6W1BO8']), | |
| set(['NY0XRPCQX2J6', '5B15T46T75XM', 'QKPLUGBHWX1S', '90BP7NQLOZI8', 'H3ETKWH70OZ0', 'BWWQDUXMWDTU', '0J5OWQRLV2ZF', 'D0K9L1DTG1EQ', 'SRXWGC3XXJJO', '148X2AS0P7MP']), | |
| set(['YS0M2FXHFUKK', 'KASAZL3RPKK6', 'ZILSSCBC40IR', 'NEFEWA5CEPMW', '8DGQWN7D24RW', 'G1FQA6E96794', 'XNP69S9V9849', 'X5YBR7LX367U', '7INXG6910I57', 'W6G19WDE9FBN']), | |
| set(['0TIBYZMOJD10', '3QBNSX4XCPSA', 'X3NC9RI7ZPUK', 'FRVXUX3X2S3R', 'V9GUVOSSR83H', '9ED47BUW3J9B', '1RY6YNAXRI7X', 'VWQTW530L7HU', 'MBA1GBU5A3MJ', 'FQR5NJPRAQ1T']), | |
| set(['27BMODQ3KSDY', '2WRJA9D9SEPC', 'Q6RVWKG553K7', '8S46FET9O2Y1', 'AG7PEPJHIALE', 'WJ9Y2OG0EKR7', 'PLXC6ZHQIVVA', 'YRTYMIDTOV1R', '2DM3J4TN9557', 'LBVFSL8OUUHG']), | |
| set(['L1EYAG4PN55N', 'WXA3PLRSG53G', '74SBBUUA94N3', 'AQ6XWF6SZZ3K', 'B45DHKLKJDYD', '5OM79AIPHX6W', 'ELVYERD2OSIT', '21USARENDKEH', 'VBEY9RLYA5IF', 'MZMYC75VUQCA']), | |
| set([' |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from __future__ import division | |
| import json | |
| import time | |
| import numpy as np | |
| from sklearn.feature_extraction.text import CountVectorizer | |
| from sklearn.cross_validation import train_test_split | |
| from sklearn.ensemble import RandomForestClassifier | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import cv2 # for this I needed to install opencv -> pip install opencv-python | |
| import numpy as np | |
| def getHashLshAlgorithm(img): | |
| # Resize to 9x8 pixels | |
| img = cv2.resize(img,(9,8)) | |
| # Compare adjacent values (x>y) | |
| img_compared = np.empty((8,8)) | |
| for i,row in enumerate(img): |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from __future__ import division,print_function | |
| import sqlite3 | |
| import time | |
| import heapq | |
| from multiprocessing import Process, Pool | |
| # get connection to the database | |
| con = sqlite3.connect(r"C:\nice\simple\path\reddit.db") | |
| con.text_factory = str |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from __future__ import division,print_function | |
| import sqlite3 | |
| import time | |
| import heapq | |
| from multiprocessing import Process, Pool | |
| from itertools import combinations | |
| from collections import defaultdict | |
| from itertools import combinations | |
| import operator |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from __future__ import print_function | |
| import sqlite3 | |
| import time | |
| from multiprocessing import Process, Pool | |
| import string | |
| import heapq | |
| # Create a connection to the database | |
| con = sqlite3.connect('/Users/some/cool/path/reddit.db') |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python | |
| from mrjob.job import MRJob | |
| from mrjob.step import MRStep | |
| import re | |
| import sys | |
| # The procedure is quite simple. In the mapper we simply add a degree for each node that appears in every edge. | |
| # we then use one reducer to sum up the occurance of each node which is the same as the degree of the node. | |
| # we then add 0 to the degree_array if the node has an even degree number else we add 1 to the degree array. |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python | |
| from mrjob.job import MRJob | |
| import re | |
| import sys | |
| # https://docs.python.org/3/library/re.html#re.compile | |
| # explenations about re.compile can be found in the above link | |
| WORD_RE = re.compile(r"[\w']+") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from mrjob.job import MRJob | |
| from mrjob.step import MRStep | |
| class MinSale(MRJob): | |
| def mapping(self, _, line): | |
| data = line.strip().split(",") | |
| date, time, store, item, cost, payment = data | |
| yield (store,time), int(cost) |
NewerOlder