Skip to content

Instantly share code, notes, and snippets.

"""
Collection of methods built to assist in data augmentation for extraction datasets
"""
from ast import literal_eval
import json
import random
from collections import defaultdict
from functools import partial
from typing import Iterable, Dict, Callable
@Slater-Victoroff
Slater-Victoroff / needle.py
Created May 17, 2019 01:09
Demo script for needle-in-a-haystack problems
"""
Demo script for Needle-in-a-haystack problems
"""
from functools import partial
import numpy as np
from indicoio.custom import vectorize
from scipy.spatial.distance import cdist
from scipy.stats import gmean
import os
import re
def fix_moodle_output(assn_dir, outdir):
for (_, _, filenames) in os.walk(assn_dir):
for filename in filenames:
# Remove spaces so we can use this as a package name
name = filename.split("_")[0].replace(" ", "").replace("-", "")
if not os.path.exists("%s/%s" % (outdir, name)):
os.makedirs("%s/%s" % (outdir, name))
[4.8502882289326867e-14, 8.820403952763587e-14, 1.0250590153123516e-13, 1.6954166246183968e-13, 1.908753789966549e-13, 2.004836996365545e-13, 3.5909784960471013e-13, 5.166693869401628e-13, 7.550572170701337e-13, 9.448695131203987e-13, 1.0156517689399418e-12, 1.1252632977953388e-12, 1.2755835948385307e-12, 1.718926371953829e-12, 5.561410412067812e-12, 5.612637550150243e-12, 8.453230126630751e-12, 8.725636132121303e-12, 1.613513316811227e-11, 1.9752204589154072e-11, 3.992415416880054e-11, 5.064793173473998e-11, 6.907684718699313e-11, 7.573645788325202e-11, 1.9359271945838294e-10, 2.5869603101103545e-10, 3.2604244542604767e-10, 6.107383625965949e-10, 3.054349088416707e-09, 3.948608349010387e-09, 4.447405216487758e-09, 1.0212339772102215e-06, 1.3008598108779382e-06, 1.3008598108779382e-06, 1.3008598108779382e-06, 1.3008598108779382e-06, 1.3008598108779382e-06, 1.3008598108779382e-06, 1.3008598108779382e-06, 1.3008598108779382e-06, 1.3008598108779382e-06, 1.3008598108779382e-06, 1.3008598108779382e-06, 1.300859810
@Slater-Victoroff
Slater-Victoroff / oneliner.py
Last active November 8, 2017 06:21
Firstletter oneliner
def firstNonRepeated(s):
return ([letter for i, letter in enumerate(s) if letter not in (s[:i] + s[i+1:])] + [""])[0]
def first_non_repeated1(s):
for i, letter in enumerate(s):
if letter not in (s[:i] + s[i+1:]):
return letter
return ""
def first_non_repeated1(s):
@Slater-Victoroff
Slater-Victoroff / twentyqs.py
Last active March 8, 2019 15:25
20 questions with word embeddings
import json
from collections import OrderedDict
import numpy as np
from scipy.spatial.distance import cdist
from indicoio.custom import vectorize
from nouns import NOUNS
FEATURES = json.load(open("features.json"), object_pairs_hook=OrderedDict)
{"http://images.neimanmarcus.com/product_assets/T/8/A/P/X/NMT8APX_mz.jpg": [0.0, 0.0, 0.7503045797348022, 0.0, 4.050353527069092, 0.0, 0.0, 3.970454454421997, 2.56343936920166, 0.0, 0.0, 0.0, 0.0, 0.8763924241065979, 2.9059646129608154, 1.7854936122894287, 0.0, 0.0, 0.0, 3.603835105895996, 0.0, 0.0, 0.0, 0.0, 2.9097509384155273, 0.6788361072540283, 0.0, 0.0, 0.0, 0.0, 0.9168252348899841, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.21072697639465332, 0.0, 2.585055112838745, 0.0, 0.18340826034545898, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.772193431854248, 0.0, 0.0, 0.0, 0.0, 1.1300804615020752, 0.0, 0.0, 0.0, 1.8126261234283447, 0.0, 0.0, 2.4351160526275635, 1.0090645551681519, 6.7620415687561035, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.6218217611312866, 0.0, 3.785642385482788, 0.0, 0.0, 0.0, 0.4094317555427551, 0.0, 1.046784520149231, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5131956338882446, 0.0, 2.341181516647339, 3.7282121181488037, 2.7647757530212402, 0.0, 0.0, 0.2037421613931656, 0.0, 0.0, 2.629900932
@Slater-Victoroff
Slater-Victoroff / datums.txt
Last active September 13, 2015 18:17
Sample data
id title description google_product_category product_type price sale_price sale_price_effective_date link mobile_link image_link additional_image_link brand gtin mpn identifier_exists condition availability availability_date item_group_id color material pattern size size_type size_system gender age_group tax shipping shipping_weight shipping_label multipack is_bundle adult adwords_redirect adwords_grouping adwords_labels custom_label_0 custom_label_1 custom_label_2 custom_label_3 custom_label_4 excluded_destination expiration_date promotion_id display_ads_id display_ads_link display_ads_similar_id display_ads_title display_ads_value shipping_length shipping_width shipping_height
sku152300450 Washable-Crepe Straight-Leg Pants, Petite, Size: PS (6/8), BLACK - Eileen Fisher Eileen Fisher Washable-Crepe Straight-Leg Pants, Petite Details From Eileen Fisher, bi-stretch crepe pants with elegant day-to-night texture and remarkable fit memory, great for travel. 29" approx. inseam. Regular rise; yoked waist contours t
def get_voter_links(outfile="voter_info.txt"):
start_urls = ("http://usavoters.directory/complete.php?id=%s" % i for i in xrange(128555, 214545))
with open(outfile, 'a') as sink:
for url in start_urls:
document = etree.HTML(requests.get(url).content)
link_selector = CSSSelector('tr>td>a')
person_links = link_selector(document)[14:]
sink.write('\n'.join(link.get('href') for link in person_links if link.get('href')))
print url
@Slater-Victoroff
Slater-Victoroff / minify.py
Created September 4, 2014 13:08
JS/CSS minification in python
import requests
import json
import urllib
URLS = {
'css': 'http://cssminifier.com/raw',
'js': 'http://javascript-minifier.com/raw'
}
def new_filepath(filepath):