This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
A quadtree is a tree data structure in which each internal node has exactly four children. Quadtrees are most often used to | |
partition a two-dimensional space by recursively subdividing it into four quadrants or regions. The regions may be square or | |
rectangular, or may have arbitrary shapes. | |
More on | |
https://en.wikipedia.org/wiki/Quadtree | |
Thanks to Jim for such an excellent visual representation on http://jimkang.com/quadtreevis/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[{'sa': '10.0.2.15', | |
'da': '104.244.42.193', | |
'pr': 6, | |
'sp': 49188, | |
'dp': 443, | |
'bytes_out': 21166, | |
'num_pkts_out': 37, | |
'bytes_in': 44703, | |
'num_pkts_in': 71, | |
'time_start': 158.474052, |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{"sa":"10.0.2.20","da":"195.113.214.245","pr":6,"sp":58178,"dp":443,"bytes_out":591,"num_pkts_out":10,"bytes_in":20706,"num_pkts_in":21,"time_start":2969087.103130,"time_end":2969087.310696,"packets":[{"b":88,"dir":">","ipt":1},{"b":1420,"dir":"<","ipt":23},{"b":1420,"dir":"<","ipt":0},{"b":1420,"dir":"<","ipt":0},{"b":1420,"dir":"<","ipt":0},{"b":1420,"dir":"<","ipt":0},{"b":1027,"dir":"<","ipt":0},{"b":208,"dir":">","ipt":1},{"b":71,"dir":"<","ipt":22},{"b":295,"dir":">","ipt":2},{"b":1420,"dir":"<","ipt":154},{"b":1420,"dir":"<","ipt":0},{"b":1216,"dir":"<","ipt":0},{"b":1420,"dir":"<","ipt":0},{"b":1420,"dir":"<","ipt":0},{"b":1232,"dir":"<","ipt":0},{"b":1420,"dir":"<","ipt":0},{"b":1420,"dir":"<","ipt":0},{"b":1420,"dir":"<","ipt":0},{"b":120,"dir":"<","ipt":0}],"ip":{"out":{"ttl":128,"id":[25129,25130,25131,25132,25133,25134,25135,25136,25137,25138]},"in":{"ttl":64,"id":[60447,60448,60449,60450,60451,60452,60453,60454,60455,60456,60457,60458,60459,60460,60461,60462,60463,60464,60465,60466,60467]}},"t |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
label = {0:'negative', 1:'positive'} | |
example = ['I love this movie'] | |
X = vect.transform(example) | |
print('Prediction: %s\nProbability: %.2f%%' %\ | |
(label[clf.predict(X)[0]], | |
np.max(clf.predict_proba(X))*100)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pickle | |
import re | |
import os | |
from vectorizer import vect | |
clf = pickle.load(open(os.path.join('pkl_objects', 'classifier.pkl'), 'rb')) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.feature_extraction.text import HashingVectorizer | |
import re | |
import os | |
import pickle | |
cur_dir = os.path.dirname(__file__) | |
stop = pickle.load(open( | |
os.path.join(cur_dir, | |
'pkl_objects', | |
'stopwords.pkl'), 'rb')) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pickle | |
import os | |
dest = os.path.join('movieclassifier', 'pkl_objects') | |
if not os.path.exists(dest): | |
os.makedirs(dest) | |
pickle.dump(stop, | |
open(os.path.join(dest, 'stopwords.pkl'), 'wb'), | |
protocol = 4) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
X_test, y_test = get_minibatch(doc_stream, size=5000) | |
X_test = vect.transform(X_test) | |
print('Accuracy: %.3f' % clf.score(X_test, y_test)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import re | |
from nltk.corpus import stopwords | |
from distutils.version import LooseVersion as Version | |
from sklearn import __version__ as sklearn_version | |
from sklearn.feature_extraction.text import HashingVectorizer | |
from sklearn.linear_model import SGDClassifier |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import re | |
from nltk.corpus import stopwords | |
stop = stopwords.words('english') | |
def tokenizer(text): | |
text = re.sub('<[^>]*>', '', text) | |
emoticons = re.findall('(?::|;|=)(?:-)?(?:\)|\(|D|P)', text.lower()) | |
text = re.sub('[\W]+', ' ', text.lower()) +\ |
NewerOlder