Morris Alper morrisalp

## wiktionary_category.py
import requests

def pages_in_wiktionary_category(category_name, language = 'en'):
  cont = ''
  while True:
    url = f'https://{language}.wiktionary.org/w/api.php?action=query&list=categorymembers&cmtitle=Category:{category_name}&cmlimit=500&format=json&cmcontinue={cont}'
    obj = requests.get(url).json()
    for x in obj['query']['categorymembers']: yield x['title']
    if 'continue' not in obj: break
    cont = obj['continue']['cmcontinue']

## .vimrc
set encoding=utf-8
set autoindent
set expandtab
set tabstop=4
set shiftwidth=4
set number
set hlsearch incsearch
set wildmenu
set showcmd
syntax on

## load_conll2003.py
import pandas as pd

def read_conll(filename):
    df = pd.read_csv(filename,
                    sep = ' ', header = None, keep_default_na = False,
                    names = ['TOKEN', 'POS', 'CHUNK', 'NE'],
                    quoting = 3, skip_blank_lines = False)
    df['SENTENCE'] = (df.TOKEN == '').cumsum()
    return df[df.TOKEN != '']

## transformer.py
from tensorflow.keras.layers import Input, Dense, Lambda, Reshape, Activation, Layer, LayerNormalization, Add
from tensorflow.keras.models import Sequential
from tensorflow.keras import Model
import tensorflow as tf

class SelfAttention(Layer):
  def __init__(self, heads = 8):
    super().__init__()
    self.heads = heads


## spacy_newline.py
import spacy

nlp = spacy.load('en')

def set_custom_boundaries(doc):
    for token in doc[:-1]:
        if token.text == "\n":
            doc[token.i+1].is_sent_start = True
    return doc

## grequests_tqdm.py
from tqdm import tqdm
import requests, grequests

class ProgressSession():
    def __init__(self, urls):
        self.pbar = tqdm(total = len(urls), desc = 'Making async requests')
        self.urls = urls
    def update(self, r, *args, **kwargs):
        if not r.is_redirect:
            self.pbar.update()

## html2text.py
from bs4 import BeautifulSoup as bs

def html2text(html):
  soup = bs(html, features='lxml')
  for script in soup(["script", "style"]):
      script.decompose()
  for br in soup.find_all("br"):
      br.replace_with("\n")
  return soup.get_text(separator=' ').strip()

## simple_bert.py
import tensorflow as tf
import tensorflow_hub as hub
from tokenizers import BertWordPieceTokenizer
from tensorflow.keras.layers import Input
from tensorflow.keras.models import Model
import numpy as np

class BERTPreprocessor:
    SEP_TOKEN = '[SEP]'


## top_k_categorical_accuracy.py
import numpy as np

def top_k_categorical_accuracy(y_true, y_pred_proba, k=1):
    return np.equal(np.argsort(y_pred_proba)[:, -k:], y_true[:, None]).any(axis=1).mean()

## flask_caching_demo.py
from flask import Flask
from flask_caching import Cache

app = Flask(__name__)
app.config.from_mapping({"CACHE_TYPE": "simple"})
cache = Cache(app)

def approximate_pi(n):
    output = 0
    for i in range(1, n):
	import requests

	def pages_in_wiktionary_category(category_name, language = 'en'):
	cont = ''
	while True:
	url = f'https://{language}.wiktionary.org/w/api.php?action=query&list=categorymembers&cmtitle=Category:{category_name}&cmlimit=500&format=json&cmcontinue={cont}'
	obj = requests.get(url).json()
	for x in obj['query']['categorymembers']: yield x['title']
	if 'continue' not in obj: break
	cont = obj['continue']['cmcontinue']
	set encoding=utf-8
	set autoindent
	set expandtab
	set tabstop=4
	set shiftwidth=4
	set number
	set hlsearch incsearch
	set wildmenu
	set showcmd
	syntax on
	import pandas as pd

	def read_conll(filename):
	df = pd.read_csv(filename,
	sep = ' ', header = None, keep_default_na = False,
	names = ['TOKEN', 'POS', 'CHUNK', 'NE'],
	quoting = 3, skip_blank_lines = False)
	df['SENTENCE'] = (df.TOKEN == '').cumsum()
	return df[df.TOKEN != '']
	from tensorflow.keras.layers import Input, Dense, Lambda, Reshape, Activation, Layer, LayerNormalization, Add
	from tensorflow.keras.models import Sequential
	from tensorflow.keras import Model
	import tensorflow as tf

	class SelfAttention(Layer):
	def __init__(self, heads = 8):
	super().__init__()
	self.heads = heads
	import spacy

	nlp = spacy.load('en')

	def set_custom_boundaries(doc):
	for token in doc[:-1]:
	if token.text == "\n":
	doc[token.i+1].is_sent_start = True
	return doc
	from tqdm import tqdm
	import requests, grequests

	class ProgressSession():
	def __init__(self, urls):
	self.pbar = tqdm(total = len(urls), desc = 'Making async requests')
	self.urls = urls
	def update(self, r, args, *kwargs):
	if not r.is_redirect:
	self.pbar.update()
	from bs4 import BeautifulSoup as bs

	def html2text(html):
	soup = bs(html, features='lxml')
	for script in soup(["script", "style"]):
	script.decompose()
	for br in soup.find_all("br"):
	br.replace_with("\n")
	return soup.get_text(separator=' ').strip()
	import tensorflow as tf
	import tensorflow_hub as hub
	from tokenizers import BertWordPieceTokenizer
	from tensorflow.keras.layers import Input
	from tensorflow.keras.models import Model
	import numpy as np

	class BERTPreprocessor:
	SEP_TOKEN = '[SEP]'
	import numpy as np

	def top_k_categorical_accuracy(y_true, y_pred_proba, k=1):
	return np.equal(np.argsort(y_pred_proba)[:, -k:], y_true[:, None]).any(axis=1).mean()
	from flask import Flask
	from flask_caching import Cache

	app = Flask(__name__)
	app.config.from_mapping({"CACHE_TYPE": "simple"})
	cache = Cache(app)

	def approximate_pi(n):
	output = 0
	for i in range(1, n):