This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import stream | |
import datetime | |
API_KEY = "XXX" | |
API_SECRET = "XXXXXX" | |
client = stream.connect(API_KEY, API_SECRET) | |
# Writing custom data to the activities. | |
user_feed = client.feed('user', '1') |
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from github import Github | |
import itertools | |
import numpy as np | |
user_id='BalazsHoranyi' # hardcoded for demo purposes | |
def get_github_events(user_name): | |
github_token = "superdupersecret" | |
urls = [f'https://api.github.com/users/{user_name}/events?page={i}&access_token={github_token}' for i in range(11)] | |
headers = {} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
import torch.nn as nn | |
import torch.nn.functional as F | |
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") | |
def gpu(tensor, gpu=False): | |
if gpu: | |
return tensor.cuda() | |
else: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from distributed import Client, LocalCluster | |
import dask.dataframe as dd | |
import numpy as np | |
cluster = LocalCluster(ip='0.0.0.0', n_workers=32, threads_per_worker=1, diagnostics_port=8787, **{'memory_limit': 2e9}) | |
client = Client(cluster) | |
print(client) | |
df = dd.read_parquet('parquet/') | |
print(f'found {len(df)} interactions') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
import pyarrow as pa | |
import pyarrow.parquet as pq | |
import requests | |
import datetime | |
import os | |
import gzip | |
from joblib import Parallel, delayed |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
print('users') | |
users = da.from_npy_stack('users', mmap_mode=None).compute().astype(np.int32) | |
print('items') | |
items = da.from_npy_stack('items', mmap_mode=None).compute().astype(np.int32) | |
print('getting unique') | |
unique_items, item_inverse, item_count = np.unique(items, return_counts=True, return_inverse=True) | |
print('creating mask') | |
good_items = unique_items[np.where(item_count > 50)[0]] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
nlp = spacy.blank('en') # create blank Language class | |
print("Created blank 'en' model") | |
# add the text classifier to the pipeline if it doesn't exist | |
# nlp.create_pipe works for built-ins that are registered with spaCy | |
if 'textcat' not in nlp.pipe_names: | |
textcat = nlp.create_pipe('textcat') | |
nlp.add_pipe(textcat, last=True) | |
# otherwise, get it, so we can add labels to it | |
else: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def update_prospects(self): | |
self.df_es = self.get_reviews(gt=True) | |
nlp = spacy.load('en') | |
nlp_sent = spacy.load('appreviews/appclass') | |
matcher = Matcher(nlp.vocab) | |
matcher.add("feednoun", None, [{POS: 'NOUN', 'LOWER': 'feed'}]) | |
matcher.add("follow", None, [{'LOWER': 'follow'}, {LEMMA: 'relation'}]) | |
matcher.add("follows", None, [{'LOWER': 'follow'}, {'LOWER': 'relationships'}]) | |
matcher.add("follows", None, [{'LOWER': 'follow'}, {LEMMA: 'relationships'}]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from annoy import AnnoyIndex | |
f = 32 | |
t = AnnoyIndex(f) | |
for i in range(len(item_embeddings)): | |
t.add_item(i, item_embeddings[i]) | |
t.build(10) # 10 trees | |
t.save('github.ann') |
NewerOlder