Skip to content

Instantly share code, notes, and snippets.

View BalazsHoranyi's full-sized avatar
🤖

Balazs Horanyi BalazsHoranyi

🤖
View GitHub Profile
import stream
import datetime
API_KEY = "XXX"
API_SECRET = "XXXXXX"
client = stream.connect(API_KEY, API_SECRET)
# Writing custom data to the activities.
user_feed = client.feed('user', '1')
@BalazsHoranyi
BalazsHoranyi / rating_decay.ipynb
Last active September 10, 2018 17:22
rating_decay
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
def update_prospects(self):
self.df_es = self.get_reviews(gt=True)
nlp = spacy.load('en')
nlp_sent = spacy.load('appreviews/appclass')
matcher = Matcher(nlp.vocab)
matcher.add("feednoun", None, [{POS: 'NOUN', 'LOWER': 'feed'}])
matcher.add("follow", None, [{'LOWER': 'follow'}, {LEMMA: 'relation'}])
matcher.add("follows", None, [{'LOWER': 'follow'}, {'LOWER': 'relationships'}])
matcher.add("follows", None, [{'LOWER': 'follow'}, {LEMMA: 'relationships'}])
nlp = spacy.blank('en') # create blank Language class
print("Created blank 'en' model")
# add the text classifier to the pipeline if it doesn't exist
# nlp.create_pipe works for built-ins that are registered with spaCy
if 'textcat' not in nlp.pipe_names:
textcat = nlp.create_pipe('textcat')
nlp.add_pipe(textcat, last=True)
# otherwise, get it, so we can add labels to it
else:
import torch
import torch.nn as nn
import torch.nn.functional as F
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
def gpu(tensor, gpu=False):
if gpu:
return tensor.cuda()
else:
from github import Github
import itertools
import numpy as np
user_id='BalazsHoranyi' # hardcoded for demo purposes
def get_github_events(user_name):
github_token = "superdupersecret"
urls = [f'https://api.github.com/users/{user_name}/events?page={i}&access_token={github_token}' for i in range(11)]
headers = {}
from annoy import AnnoyIndex
f = 32
t = AnnoyIndex(f)
for i in range(len(item_embeddings)):
t.add_item(i, item_embeddings[i])
t.build(10) # 10 trees
t.save('github.ann')
print('users')
users = da.from_npy_stack('users', mmap_mode=None).compute().astype(np.int32)
print('items')
items = da.from_npy_stack('items', mmap_mode=None).compute().astype(np.int32)
print('getting unique')
unique_items, item_inverse, item_count = np.unique(items, return_counts=True, return_inverse=True)
print('creating mask')
good_items = unique_items[np.where(item_count > 50)[0]]
interactions = da.from_npy_stack('interactions')
users = interactions[:,0]
items = interactions[:,1]
slicer = 10000000
for i in tqdm(range(math.ceil((len(interactions))/slicer))):
if i == 0:
user_set = set(users[i*slicer: (i+1)*slicer].compute())
else:
def to_dask_array(df):
# https://stackoverflow.com/questions/37444943/dask-array-from-dataframe?utm_medium=organic&utm_source=google_rich_qa&utm_campaign=google_rich_qa
partitions = df.to_delayed()
shapes = [part.values.shape for part in partitions]
dtypes = partitions[0].dtypes
results = compute(dtypes, *shapes) # trigger computation to find shape
dtypes, shapes = results[0], results[1:]
chunks = [da.from_delayed(part.values, shape, dtypes)