Skip to content

Instantly share code, notes, and snippets.

This file has been truncated, but you can view the full file.
{
"90001": {
"address_components": [
{
"long_name": "90001",
"short_name": "90001",
"types": [
"postal_code"
]
},
{
"results" : [
{
"address_components" : [
{
"long_name" : "1049",
"short_name" : "1049",
"types" : [ "street_number" ]
},
{
@ranihorev
ranihorev / BPE
Created January 6, 2019 01:07
Byte Pair Encoding example (Source: Sennrich et al. - https://arxiv.org/abs/1508.07909)
import re, collections
def get_stats(vocab):
pairs = collections.defaultdict(int)
for word, freq in vocab.items():
symbols = word.split()
for i in range(len(symbols)-1):
pairs[symbols[i],symbols[i+1]] += freq
return pairs
@ranihorev
ranihorev / BPE
Created January 6, 2019 01:07
Byte Pair Encoding example (Source: Sennrich et al.)
import re, collections
def get_stats(vocab):
pairs = collections.defaultdict(int)
for word, freq in vocab.items():
symbols = word.split()
for i in range(len(symbols)-1):
pairs[symbols[i],symbols[i+1]] += freq
return pairs
@ranihorev
ranihorev / Structured_with_text.py
Last active June 24, 2020 05:13
PyTorch module for classification or regression of categorical+continuous+text inputs. This module is based on fast.ai library
from fastai.text import *
from fastai.structured import proc_df
import pandas as pd
import numpy as np
class MixedInputModelWithText(nn.Module):
def __init__(self, emb_szs, n_cont, emb_drop, out_sz, szs, drops,
y_range=None, use_bn=False, is_reg=True, is_multi=False, n_text=0):
super().__init__()
for i, (c, s) in enumerate(emb_szs): assert c > 1, f"cardinality must be >=2, got emb_szs[{i}]: ({c},{s})"
0xCe5E7214E74b62F2a1398db5CFb86eF68f8c1EF3
0x732d75a4000cB2F38914FC1B6440A9E3753e21f2
0x541ea7e288d7344b28903ca862ff9fe3efc8c6cd
0xE837758E2f4A21dd0abd4eC3CA252B5ad352bcB2
0xe837758e2f4a21dd0abd4ec3ca252b5ad352bcb2