Skip to content

Instantly share code, notes, and snippets.

View urigoren's full-sized avatar

Uri Goren urigoren

View GitHub Profile
import re
from html import unescape
def html2text(htm):
ret = unescape(htm)
ret = ret.translate({
8209: ord('-'),
ord('`'): ord("'"),
ord('’'): ord("'"),
8220: ord('"'),
8221: ord('"'),
@urigoren
urigoren / bgprocess.py
Last active February 3, 2022 09:51
Run a python process in the background
from pathlib import Path
import subprocess, sys
def bgprocess(p:Path, *args):
python = sys.executable
if not isinstance(p, Path):
p = Path(p)
p = p.absolute()
return subprocess.Popen([python, p.name]+list(args), cwd = str(p.parent), stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
@urigoren
urigoren / .vimrc
Last active November 28, 2021 12:27
set splitbelow
nnoremap <F4> :set hlsearch! nohlsearch?<CR>
imap <F4> <C-O><F4>
nnoremap <F2> :let @/="qoXouQoz"<CR>:set invpaste paste?<CR>
set pastetoggle=<F2>
nnoremap <silent> <F3> :let @/ .= '\\|\<'.expand('<cword>').'\>'<cr>n
nnoremap <buffer> <F5> :w<cr>:exec '!/usr/bin/python3' shellescape(@%, 1)<cr>
syntax on
@urigoren
urigoren / ConditionedTextClassifier.py
Last active July 10, 2021 14:05
Bag-of-words baseline for conditional text classification
from copy import deepcopy as clone
from sklearn.base import ClassifierMixin
from sklearn.pipeline import Pipeline
class ConditionedTextClassifier(ClassifierMixin):
def __init__(self, conditions, model, condition_sep=' <s> '):
self.condition_sep=condition_sep
self.conditions = {}
for c in conditions:
self.conditions[c] = clone(model)
@urigoren
urigoren / config_reader.py
Created May 1, 2021 19:41
A simple cascading config reader
import os, sys, json
from pathlib import Path
class ConfigReader:
def __init__(self, default=None, **kwargs):
self.default=default
self.py_file = Path(os.path.join(os.getcwd(), sys.argv[0])).absolute()
p = self.py_file.parent
found_config_json = []
while p!=Path('/'):
@urigoren
urigoren / rshift.py
Last active April 19, 2021 20:50
Use arrow notation (>>) like Haskell to make filter, map and reduce operations more readable.
from itertools import chain
from functools import reduce
import operator
"""
Usage of this module:
<iterable> >> function3 * function2 * function1 >> aggregation
for example:
import numpy as np
from scipy import sparse
import collections
"""
#SPARK Co-Occurence Matrix
#Format: (vertex, vertex) : count
import json, operator, itertools
def cooccur_matrix(srcHdfs, product2vertex):
ret = sc.textFile(srcHdfs)\
@urigoren
urigoren / .htaccess
Last active February 7, 2021 13:00
Call python via command line from php
Options +SymLinksIfOwnerMatch
RewriteEngine on
RewriteCond %{REQUEST_FILENAME} !-f
RewriteCond %{REQUEST_FILENAME} !-d
RewriteRule ^((?!index\.php).+)$ /index.php?py=$1 [NC,L,QSA]
import sys, os
import streamlit as st
def file2page_name(fname):
return fname.replace('.py', '').split("_", 1)[1].title()
sys.path.append("..")
page_files = dict()
"""
A python wrapper for the icount.co.il api
https://www.icount.co.il/api-v3/
"""
import json
from urllib import request, parse
def post(url, data):
req = request.Request(url, data=parse.urlencode(data).encode())