Skip to content

Instantly share code, notes, and snippets.

View urigoren's full-sized avatar

Uri Goren urigoren

View GitHub Profile
@urigoren
urigoren / mcl.py
Last active April 17, 2024 23:52
Markov clustering algorithm
import numpy as np
from scipy.sparse import linalg, eye, csr_matrix
from sklearn.preprocessing import normalize
from sklearn.metrics.pairwise import pairwise_distances
from collections import defaultdict
class MarkovClustering:
def __init__(self, matrix, metric="cosine", bias=1):
@urigoren
urigoren / heic2jpg.py
Created November 10, 2023 15:45
A python script to convert all HEIC photos in a folder to JPG format
from PIL import Image
from pathlib import Path
from pillow_heif import register_heif_opener
from tqdm import tqdm
from argparse import ArgumentParser
register_heif_opener()
def main(params):
print("Converting HEIC files to JPG")
files = list(Path(".").glob("*.heic")) + list(Path(".").glob("*.HEIC"))
import openai
from decouple import config
openai.api_key = config("OPENAI_KEY")
YES_TOKEN = frozenset([5297, 3763, 3363, 8505, 3363, 3763, 43335, 3763, 21560])
NO_TOKEN = frozenset([2949, 645, 1400, 3919, 1400, 645, 15285, 645, 8005])
def yes_or_no(txt: str)->bool:
response = openai.Completion.create(
model="text-davinci-003",
@urigoren
urigoren / LSTM_Binary.py
Last active June 22, 2023 19:37
LSTM Binary classification with Keras
from keras.layers import Dense, Dropout, LSTM, Embedding
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
import pandas as pd
import numpy as np
input_file = 'input.csv'
def load_data(test_split = 0.2):
print ('Loading data...')
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@urigoren
urigoren / wsl2_reclaim.txt
Created January 14, 2023 13:48
How to reclaim WSL2 space
wsl --shutdown
diskpart
# open window Diskpart
select vdisk file="C:\Users\ugore\AppData\Local\Packages\CanonicalGroupLimited.Ubuntu20.04onWindows_79rhkp1fndgsc\LocalState\ext4.vhdx"
attach vdisk readonly
compact vdisk
detach vdisk
exit
@urigoren
urigoren / hvim.sh
Created July 4, 2017 10:58
Run vim on hadoop files
hadoop fs -text $1>hvim.txt
vim hvim.txt
hadoop fs -rm -skipTrash $1
hadoop fs -copyFromLocal hvim.txt $1
rm hvim.txt
hadoop fs -chmod 777 $1
@urigoren
urigoren / word2vec_train.py
Last active October 24, 2022 15:07
A command-line script to train word2vec on all text files in a dictionary using Gensim, and a predefined vocabulary.
import sys, random, json
from gensim.models import Word2Vec
from argparse import ArgumentParser
from collections import Counter
from pathlib import Path
__dir__= Path(__file__).absolute().parent
class DirSentences(object):
from collections import defaultdict
from itertools import product
from scipy import sparse
from sklearn.base import TransformerMixin
class InteractionBySplit(TransformerMixin):
"""
Takes a sparse matrix as input, and an index to split by, and returns all possible interactions before and after that index.
"""
def __init__(self, split_index,*args,**kwargs):
<?php
define('SLACK_WEBHOOK', 'https://hooks.slack.com/services/xxx/yyy/zzz');
define('TELEGRAM_BOT_TOKEN', '...');
define('TELEGRAM_CHAT_ID', '12345');
function slack($txt) {
$msg = array('text' => $txt);
$c = curl_init(SLACK_WEBHOOK);
curl_setopt($c, CURLOPT_RETURNTRANSFER, true);
curl_setopt($c, CURLOPT_SSL_VERIFYPEER, false);