Balazs Horanyi BalazsHoranyi

## HSBC_Examples.py
import stream
import datetime

API_KEY = "XXX"
API_SECRET = "XXXXXX"
client = stream.connect(API_KEY, API_SECRET)


# Writing custom data to the activities.
user_feed = client.feed('user', '1')

## rating_decay.ipynb

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                BalazsHoranyi
                / rating_decay.ipynb
            
            
              Last active
              September 10, 2018 17:22
            
              
                rating_decay
              
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## avg_item_embedding.py
from github import Github
import itertools
import numpy as np

user_id='BalazsHoranyi'  # hardcoded for demo purposes

def get_github_events(user_name):
    github_token = "superdupersecret"
    urls = [f'https://api.github.com/users/{user_name}/events?page={i}&access_token={github_token}' for i in range(11)]
    headers = {}

## NeuMF.py
import torch
import torch.nn as nn
import torch.nn.functional as F

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

def gpu(tensor, gpu=False):
    if gpu:
        return tensor.cuda()
    else:

## strip_user_repo.py
from distributed import Client, LocalCluster
import dask.dataframe as dd
import numpy as np

cluster = LocalCluster(ip='0.0.0.0', n_workers=32, threads_per_worker=1, diagnostics_port=8787, **{'memory_limit': 2e9})
client = Client(cluster)
print(client)

df = dd.read_parquet('parquet/')
print(f'found {len(df)} interactions')

## GH_Archive.py
import pandas as pd
import numpy as np
import pyarrow as pa
import pyarrow.parquet as pq
import requests
import datetime
import os
import gzip
from joblib import Parallel, delayed

## normalize_interactions.py
print('users')
users = da.from_npy_stack('users', mmap_mode=None).compute().astype(np.int32)

print('items')
items = da.from_npy_stack('items', mmap_mode=None).compute().astype(np.int32)

print('getting unique')
unique_items,  item_inverse, item_count = np.unique(items, return_counts=True, return_inverse=True)
print('creating mask')
good_items = unique_items[np.where(item_count > 50)[0]]

## spacy_sentiment_classifier.py
nlp = spacy.blank('en')  # create blank Language class
print("Created blank 'en' model")

# add the text classifier to the pipeline if it doesn't exist
# nlp.create_pipe works for built-ins that are registered with spaCy
if 'textcat' not in nlp.pipe_names:
    textcat = nlp.create_pipe('textcat')
    nlp.add_pipe(textcat, last=True)
# otherwise, get it, so we can add labels to it
else:

## lead_generator.py
def update_prospects(self):

    self.df_es = self.get_reviews(gt=True)
    nlp = spacy.load('en')
    nlp_sent = spacy.load('appreviews/appclass')
    matcher = Matcher(nlp.vocab)
    matcher.add("feednoun", None, [{POS: 'NOUN', 'LOWER': 'feed'}])
    matcher.add("follow", None, [{'LOWER': 'follow'}, {LEMMA: 'relation'}])
    matcher.add("follows", None, [{'LOWER': 'follow'}, {'LOWER': 'relationships'}])
    matcher.add("follows", None, [{'LOWER': 'follow'}, {LEMMA: 'relationships'}])

## ANN_index_build.py
from annoy import AnnoyIndex

f = 32
t = AnnoyIndex(f)
for i in range(len(item_embeddings)):
    t.add_item(i, item_embeddings[i])

t.build(10) # 10 trees
t.save('github.ann')
	import stream
	import datetime

	API_KEY = "XXX"
	API_SECRET = "XXXXXX"
	client = stream.connect(API_KEY, API_SECRET)


	# Writing custom data to the activities.
	user_feed = client.feed('user', '1')
	from github import Github
	import itertools
	import numpy as np

	user_id='BalazsHoranyi' # hardcoded for demo purposes

	def get_github_events(user_name):
	github_token = "superdupersecret"
	urls = [f'https://api.github.com/users/{user_name}/events?page={i}&access_token={github_token}' for i in range(11)]
	headers = {}
	import torch
	import torch.nn as nn
	import torch.nn.functional as F

	device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

	def gpu(tensor, gpu=False):
	if gpu:
	return tensor.cuda()
	else:
	from distributed import Client, LocalCluster
	import dask.dataframe as dd
	import numpy as np

	cluster = LocalCluster(ip='0.0.0.0', n_workers=32, threads_per_worker=1, diagnostics_port=8787, **{'memory_limit': 2e9})
	client = Client(cluster)
	print(client)

	df = dd.read_parquet('parquet/')
	print(f'found {len(df)} interactions')
	import pandas as pd
	import numpy as np
	import pyarrow as pa
	import pyarrow.parquet as pq
	import requests
	import datetime
	import os
	import gzip
	from joblib import Parallel, delayed
	print('users')
	users = da.from_npy_stack('users', mmap_mode=None).compute().astype(np.int32)

	print('items')
	items = da.from_npy_stack('items', mmap_mode=None).compute().astype(np.int32)

	print('getting unique')
	unique_items, item_inverse, item_count = np.unique(items, return_counts=True, return_inverse=True)
	print('creating mask')
	good_items = unique_items[np.where(item_count > 50)[0]]
	nlp = spacy.blank('en') # create blank Language class
	print("Created blank 'en' model")

	# add the text classifier to the pipeline if it doesn't exist
	# nlp.create_pipe works for built-ins that are registered with spaCy
	if 'textcat' not in nlp.pipe_names:
	textcat = nlp.create_pipe('textcat')
	nlp.add_pipe(textcat, last=True)
	# otherwise, get it, so we can add labels to it
	else:
	def update_prospects(self):

	self.df_es = self.get_reviews(gt=True)
	nlp = spacy.load('en')
	nlp_sent = spacy.load('appreviews/appclass')
	matcher = Matcher(nlp.vocab)
	matcher.add("feednoun", None, [{POS: 'NOUN', 'LOWER': 'feed'}])
	matcher.add("follow", None, [{'LOWER': 'follow'}, {LEMMA: 'relation'}])
	matcher.add("follows", None, [{'LOWER': 'follow'}, {'LOWER': 'relationships'}])
	matcher.add("follows", None, [{'LOWER': 'follow'}, {LEMMA: 'relationships'}])
	from annoy import AnnoyIndex

	f = 32
	t = AnnoyIndex(f)
	for i in range(len(item_embeddings)):
	t.add_item(i, item_embeddings[i])

	t.build(10) # 10 trees
	t.save('github.ann')