Skip to content

Instantly share code, notes, and snippets.

View urigoren's full-sized avatar

Uri Goren urigoren

View GitHub Profile
@urigoren
urigoren / word2vec_train.py
Last active October 24, 2022 15:07
A command-line script to train word2vec on all text files in a dictionary using Gensim, and a predefined vocabulary.
import sys, random, json
from gensim.models import Word2Vec
from argparse import ArgumentParser
from collections import Counter
from pathlib import Path
__dir__= Path(__file__).absolute().parent
class DirSentences(object):
import json, boto3
s3 = boto3.resource("s3").Bucket("bucket")
json.load_s3 = lambda f: json.load(s3.Object(key=f).get()["Body"])
json.dump_s3 = lambda obj, f: s3.Object(key=f).put(Body=json.dumps(obj))
@urigoren
urigoren / .vimrc
Last active November 28, 2021 12:27
set splitbelow
nnoremap <F4> :set hlsearch! nohlsearch?<CR>
imap <F4> <C-O><F4>
nnoremap <F2> :let @/="qoXouQoz"<CR>:set invpaste paste?<CR>
set pastetoggle=<F2>
nnoremap <silent> <F3> :let @/ .= '\\|\<'.expand('<cword>').'\>'<cr>n
nnoremap <buffer> <F5> :w<cr>:exec '!/usr/bin/python3' shellescape(@%, 1)<cr>
syntax on
@urigoren
urigoren / nre.md
Last active September 14, 2019 18:20
Nice regular expressions
import numpy as np
from matplotlib import pyplot as plt
def polyfit_plot(x, y, p=1):
plt.scatter(x, y)
axes = plt.gca()
coeff = np.polyfit(x, y, p)
X_plot = np.linspace(axes.get_xlim()[0],axes.get_xlim()[1],100)
print (np.corrcoef(x,y)[0,1])
Y_plot = 0
@urigoren
urigoren / purity.py
Last active August 9, 2017 20:03
This module tests whether a function is pure
import ast, inspect, textwrap
whitelist = {'math', 'itertools', 'collections', 'functools', 'operator',
'json', 'pickle', 'string', 'types', 'statistics', 'fractions', 'decimal'}
def pure(f):
"""pure decorator"""
f.pure = True
return f
@urigoren
urigoren / mcl.py
Last active April 17, 2024 23:52
Markov clustering algorithm
import numpy as np
from scipy.sparse import linalg, eye, csr_matrix
from sklearn.preprocessing import normalize
from sklearn.metrics.pairwise import pairwise_distances
from collections import defaultdict
class MarkovClustering:
def __init__(self, matrix, metric="cosine", bias=1):
@urigoren
urigoren / hvim.sh
Created July 4, 2017 10:58
Run vim on hadoop files
hadoop fs -text $1>hvim.txt
vim hvim.txt
hadoop fs -rm -skipTrash $1
hadoop fs -copyFromLocal hvim.txt $1
rm hvim.txt
hadoop fs -chmod 777 $1
@urigoren
urigoren / rshift.py
Last active April 19, 2021 20:50
Use arrow notation (>>) like Haskell to make filter, map and reduce operations more readable.
from itertools import chain
from functools import reduce
import operator
"""
Usage of this module:
<iterable> >> function3 * function2 * function1 >> aggregation
for example:
@urigoren
urigoren / LSTM_Binary.py
Last active June 22, 2023 19:37
LSTM Binary classification with Keras
from keras.layers import Dense, Dropout, LSTM, Embedding
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
import pandas as pd
import numpy as np
input_file = 'input.csv'
def load_data(test_split = 0.2):
print ('Loading data...')