Skip to content

Instantly share code, notes, and snippets.

Avatar

Uri Goren urigoren

View GitHub Profile
@urigoren
urigoren / ConditionedTextClassifier.py
Last active Jul 10, 2021
Bag-of-words baseline for conditional text classification
View ConditionedTextClassifier.py
from copy import deepcopy as clone
from sklearn.base import ClassifierMixin
from sklearn.pipeline import Pipeline
class ConditionedTextClassifier(ClassifierMixin):
def __init__(self, conditions, model, condition_sep=' <s> '):
self.condition_sep=condition_sep
self.conditions = {}
for c in conditions:
self.conditions[c] = clone(model)
@urigoren
urigoren / config_reader.py
Created May 1, 2021
A simple cascading config reader
View config_reader.py
import os, sys, json
from pathlib import Path
class ConfigReader:
def __init__(self, default=None, **kwargs):
self.default=default
self.py_file = Path(os.path.join(os.getcwd(), sys.argv[0])).absolute()
p = self.py_file.parent
found_config_json = []
while p!=Path('/'):
View google_drive2youtube.ipynb
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@urigoren
urigoren / .htaccess
Last active Feb 7, 2021
Call python via command line from php
View .htaccess
Options +SymLinksIfOwnerMatch
RewriteEngine on
RewriteCond %{REQUEST_FILENAME} !-f
RewriteCond %{REQUEST_FILENAME} !-d
RewriteRule ^((?!index\.php).+)$ /index.php?py=$1 [NC,L,QSA]
View streamlit_multipage.py
import sys, os
import streamlit as st
def file2page_name(fname):
return fname.replace('.py', '').split("_", 1)[1].title()
sys.path.append("..")
page_files = dict()
View namedtuples.py
from collections import namedtuple
from datetime import datetime
date_pattern = "%Y-%m-%dT%H:%M:%S.%fZ"
Point = namedtuple("Point", ("x", "y"))
def serialize_datetime(nt):
assert hasattr(nt, '_asdict')
View pdf2grid,py
import numpy as np
import pdfplumber
import itertools, collections, sys, os, re, json
from pprint import pprint as pr
from copy import deepcopy
from operator import itemgetter as at
class CartesianText:
__slots__ = ["text", "x0", "x1", "y0", "y1", "page_height"]
View wikipedia_infobox.py
import collections
import wikipedia
from bs4 import BeautifulSoup
def infobox(wiki_page):
"""Returns the infobox of a given wikipedia page"""
if isinstance(wiki_page, str):
wiki_page = wikipedia.page(wiki_page)
try:
soup = BeautifulSoup(wiki_page.html()).find_all("table", {"class": "infobox"})[0]
View AgglomerativeClustering_text.py
import numpy as np
import collections, itertools, string
from scipy.cluster import hierarchy
from scipy.spatial import distance
from sklearn.feature_extraction import text
from editdistance import distance as editdistance
def edit_pdist(toks, normalize=False):
"""Return pairwise editdistance matrix"""
n = len(toks)
View icount.py
"""
A python wrapper for the icount.co.il api
https://www.icount.co.il/api-v3/
"""
import json
from urllib import request, parse
def post(url, data):
req = request.Request(url, data=parse.urlencode(data).encode())