Skip to content

Instantly share code, notes, and snippets.

View BalazsHoranyi's full-sized avatar
🤖

Balazs Horanyi BalazsHoranyi

🤖
View GitHub Profile
import stream
import datetime
API_KEY = "XXX"
API_SECRET = "XXXXXX"
client = stream.connect(API_KEY, API_SECRET)
# Writing custom data to the activities.
user_feed = client.feed('user', '1')
@BalazsHoranyi
BalazsHoranyi / rating_decay.ipynb
Last active September 10, 2018 17:22
rating_decay
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
from github import Github
import itertools
import numpy as np
user_id='BalazsHoranyi' # hardcoded for demo purposes
def get_github_events(user_name):
github_token = "superdupersecret"
urls = [f'https://api.github.com/users/{user_name}/events?page={i}&access_token={github_token}' for i in range(11)]
headers = {}
import torch
import torch.nn as nn
import torch.nn.functional as F
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
def gpu(tensor, gpu=False):
if gpu:
return tensor.cuda()
else:
from distributed import Client, LocalCluster
import dask.dataframe as dd
import numpy as np
cluster = LocalCluster(ip='0.0.0.0', n_workers=32, threads_per_worker=1, diagnostics_port=8787, **{'memory_limit': 2e9})
client = Client(cluster)
print(client)
df = dd.read_parquet('parquet/')
print(f'found {len(df)} interactions')
@BalazsHoranyi
BalazsHoranyi / GH_Archive.py
Last active May 31, 2018 15:36
download github data
import pandas as pd
import numpy as np
import pyarrow as pa
import pyarrow.parquet as pq
import requests
import datetime
import os
import gzip
from joblib import Parallel, delayed
print('users')
users = da.from_npy_stack('users', mmap_mode=None).compute().astype(np.int32)
print('items')
items = da.from_npy_stack('items', mmap_mode=None).compute().astype(np.int32)
print('getting unique')
unique_items, item_inverse, item_count = np.unique(items, return_counts=True, return_inverse=True)
print('creating mask')
good_items = unique_items[np.where(item_count > 50)[0]]
nlp = spacy.blank('en') # create blank Language class
print("Created blank 'en' model")
# add the text classifier to the pipeline if it doesn't exist
# nlp.create_pipe works for built-ins that are registered with spaCy
if 'textcat' not in nlp.pipe_names:
textcat = nlp.create_pipe('textcat')
nlp.add_pipe(textcat, last=True)
# otherwise, get it, so we can add labels to it
else:
def update_prospects(self):
self.df_es = self.get_reviews(gt=True)
nlp = spacy.load('en')
nlp_sent = spacy.load('appreviews/appclass')
matcher = Matcher(nlp.vocab)
matcher.add("feednoun", None, [{POS: 'NOUN', 'LOWER': 'feed'}])
matcher.add("follow", None, [{'LOWER': 'follow'}, {LEMMA: 'relation'}])
matcher.add("follows", None, [{'LOWER': 'follow'}, {'LOWER': 'relationships'}])
matcher.add("follows", None, [{'LOWER': 'follow'}, {LEMMA: 'relationships'}])
from annoy import AnnoyIndex
f = 32
t = AnnoyIndex(f)
for i in range(len(item_embeddings)):
t.add_item(i, item_embeddings[i])
t.build(10) # 10 trees
t.save('github.ann')