Skip to content

Instantly share code, notes, and snippets.


Uri Goren urigoren

View GitHub Profile
urigoren /
Last active Jul 10, 2021
Bag-of-words baseline for conditional text classification
from copy import deepcopy as clone
from sklearn.base import ClassifierMixin
from sklearn.pipeline import Pipeline
class ConditionedTextClassifier(ClassifierMixin):
def __init__(self, conditions, model, condition_sep=' <s> '):
self.conditions = {}
for c in conditions:
self.conditions[c] = clone(model)
urigoren /
Created May 1, 2021
A simple cascading config reader
import os, sys, json
from pathlib import Path
class ConfigReader:
def __init__(self, default=None, **kwargs):
self.py_file = Path(os.path.join(os.getcwd(), sys.argv[0])).absolute()
p = self.py_file.parent
found_config_json = []
while p!=Path('/'):
View google_drive2youtube.ipynb
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
urigoren / .htaccess
Last active Feb 7, 2021
Call python via command line from php
View .htaccess
Options +SymLinksIfOwnerMatch
RewriteEngine on
RewriteCond %{REQUEST_FILENAME} !-f
RewriteCond %{REQUEST_FILENAME} !-d
RewriteRule ^((?!index\.php).+)$ /index.php?py=$1 [NC,L,QSA]
import sys, os
import streamlit as st
def file2page_name(fname):
return fname.replace('.py', '').split("_", 1)[1].title()
page_files = dict()
from collections import namedtuple
from datetime import datetime
date_pattern = "%Y-%m-%dT%H:%M:%S.%fZ"
Point = namedtuple("Point", ("x", "y"))
def serialize_datetime(nt):
assert hasattr(nt, '_asdict')
View pdf2grid,py
import numpy as np
import pdfplumber
import itertools, collections, sys, os, re, json
from pprint import pprint as pr
from copy import deepcopy
from operator import itemgetter as at
class CartesianText:
__slots__ = ["text", "x0", "x1", "y0", "y1", "page_height"]
import collections
import wikipedia
from bs4 import BeautifulSoup
def infobox(wiki_page):
"""Returns the infobox of a given wikipedia page"""
if isinstance(wiki_page, str):
wiki_page =
soup = BeautifulSoup(wiki_page.html()).find_all("table", {"class": "infobox"})[0]
import numpy as np
import collections, itertools, string
from scipy.cluster import hierarchy
from scipy.spatial import distance
from sklearn.feature_extraction import text
from editdistance import distance as editdistance
def edit_pdist(toks, normalize=False):
"""Return pairwise editdistance matrix"""
n = len(toks)
A python wrapper for the api
import json
from urllib import request, parse
def post(url, data):
req = request.Request(url, data=parse.urlencode(data).encode())