Skip to content

Instantly share code, notes, and snippets.

View mapmeld's full-sized avatar

Nick Doiron mapmeld

  • Chicago, IL
View GitHub Profile
@mapmeld
mapmeld / 1draft.py
Last active January 5, 2020 21:19
first-draft qa
from allennlp.predictors import Predictor
from transformers.tokenization_gpt2 import GPT2Tokenizer
from transformers import pipeline
class HuggingFacePredictor(Predictor):
def __init__(self) -> None:
self.tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
self.model = pipeline('question-answering')
def predict(self, passage='', question=''):
@mapmeld
mapmeld / qa.py
Created January 5, 2020 16:53
Q&A Testing
from allennlp.predictors.predictor import Predictor
predictor = Predictor.from_path("https://storage.googleapis.com/allennlp-public-models/bidaf-elmo-model-2018.11.30-charpad.tar.gz")
qas = open("simplified-nq-test.jsonl").read().split("\n")
for qa in qas:
rep = json.loads(qa)
best = rep['long_answer_candidates'][0]
print(rep['question_text'])
print('AllenNLP: ')
print(predictor.predict(
@mapmeld
mapmeld / state_specific.py
Created January 2, 2020 15:52
State-specific maps of Native American Communities
from sys import argv
import json
# pip install fiona shapely shapely-geojson
import fiona
from shapely.geometry import shape
from shapely_geojson import dumps
if len(argv) < 2:
print('usage: gen_map.py "New Mexico" > output.geojson')
@mapmeld
mapmeld / 2020_ml.md
Last active December 30, 2019 16:36
2020_ml_problems.md

The number of awesome ML projects is limitless, but:

This lists project ideas which I grouped together as awesome and seemingly achievable:

Open-ended Datasets

@mapmeld
mapmeld / mentionsum.py
Last active December 29, 2019 03:39
mentionsum
import pandas as pd
for lang in ['ar', 'en', 'ru', 'ja', 'tr', 'fa']:
mentionsum = {}
for doc in range(1, 10): # ends at 9
print(doc)
df = pd.read_csv("saudi_arabia_112019_tweets_csv_hashed_" + str(doc) + ".csv")
rows = df[df['tweet_language'] == lang][['user_mentions']].values.tolist()
df = None # clear memory
for row in rows:
mentions = row[0].replace('[','').replace(']','').replace('\'','').split(', ')
@mapmeld
mapmeld / langsum.py
Last active December 29, 2019 02:27
LangSum.py
import pandas as pd
dflangsum = None
for doc in range(1, 10): # ends at 9
df = pd.read_csv("saudi_arabia_112019_tweets_csv_hashed_" + str(doc) + ".csv")
langcount = df[df['is_retweet'] == False].groupby(['tweet_language']).count()['tweetid']
if dflangsum is not None:
dflangsum += langcount
else:
dflangsum = langcount
df = None # memory
@mapmeld
mapmeld / face_classifier.py
Created December 22, 2019 22:07
face_classifier.py
"""
# BASH dependencies
apt-get install python-opencv ffmpeg
pip install keras numpy shap matplotlib pillow
rm ./drive/My\ Drive/mlin/training/*/*.jpg
rm ./drive/My\ Drive/mlin/validation/*/*.jpg
"""
# native imports
@mapmeld
mapmeld / config.json
Created September 28, 2019 13:52
Config for Mozilla TTS
# set config.json for LJSpeech
%%writefile config.json
{
"run_name": "mozilla-tacotron-tagent-bn",
"run_description": "Xhosa",
"audio":{
// Audio processing parameters
"num_mels": 80, // size of the mel spec frame.
"num_freq": 1201, // number of stft frequency levels. Size of the linear spectogram frame.
const fs = require('fs');
const en = JSON.parse(fs.readFileSync('en.json'));
const fa = JSON.parse(fs.readFileSync('fa.json'));
let terms = Object.keys(en);
let missing = {};
terms.forEach((term) => {
if (!fa[term]) {
@mapmeld
mapmeld / word_flatten.py
Created September 3, 2019 16:10
word_flatten.py
text_src = item['text']
del item['text']
words = wordpunct_tokenize(text_src)
sentence_vecs = []
for w in range(0, len(words)):
word = words[w]
if word in ar_model:
word_vec = ar_model[word]
else:
word_vec = ar_model['the']