Skip to content

Instantly share code, notes, and snippets.

@thomashikaru
thomashikaru / nyc_libraries.py
Last active April 15, 2020 21:49
Mapping Manhattan by distance to nearest library
import pandas as pd
import json
import math
import plotly.express as px
import numpy as np
from sklearn import neighbors
# load geojson data for manhattan
nycmap = json.load(open("nycpluto_manhattan.geojson"))
@thomashikaru
thomashikaru / nyc_neighborhood_density.py
Created April 14, 2020 22:10
Visualizing density in New York City's 5 boroughs by neighborhood
import pandas as pd
import json
import math
import plotly.express as px
from area import area
# read the neighborhood population data into a DataFrame and load the GeoJSON data
df = pd.read_csv('New_York_City_Population_By_Neighborhood_Tabulation_Areas.csv')
nycmap = json.load(open("nyc_neighborhoods.geojson"))
@thomashikaru
thomashikaru / trump_markov.py
Created May 15, 2020 15:49
Generate fake Donald Trump tweets using a Markov Model
import numpy as np
class MarkovModel:
"""Represents a Markov Model for a given text"""
def __init__(self, n, text):
"""Constructor takes n-gram length and training text
and builds dictionary mapping n-grams to
character-probability mappings."""
import tensorflow as tf
import numpy as np
import os
# load and lightly pre-process data
text = " ".join(open("trump_tweets_all.txt").readlines())
text = " ".join(text.split())
text = text.encode("ascii", errors="ignore").decode()
print(text[:100])
import torch
import plotly.express as px
import pandas as pd
# Batch Size, Input Neurons, Hidden Neurons, Output Neurons
N, D_in, H, D_out = 128, 2, 1024, 1
# Create random Tensors to hold inputs and outputs
x = torch.rand(N, D_in)
y = torch.randint(0, 2, (N, D_out))
import torch
import plotly.graph_objects as go
import numpy as np
# Batch Size, Input Neurons, Hidden Neurons, Output Neurons
N, D_in, H, D_out = 16, 1, 1024, 1
# Create random Tensors to hold inputs and outputs
x = torch.randn(N, D_in)
y = torch.randn(N, D_out)
import argparse
import pandas as pd
if __name__ == "__main__":
# create argument parser and define arguments
parser = argparse.ArgumentParser()
parser.add_argument("--inputfile", default="inputfile.csv")
parser.add_argument("--num_rows", type=int, default=10)
parser.add_argument("--print_output", action="store_true")
# without formatter
dictionary = {"a":[1,2,3,4,5,6,7,8,9,8,7,6,5,4,3,2,1], "b":[9,8,7,6,5,4,3,2,1,2,3,4,5,6,7,8,9]}
list_of_items = [f"A: {a}, B: {b}, C: {c}" for a, b, c in itertools.product(range(0,100,2), range(0,100,3), range(0,100,4))]
# with formatter
dictionary = {
"a": [1, 2, 3, 4, 5, 6, 7, 8, 9, 8, 7, 6, 5, 4, 3, 2, 1],
"b": [9, 8, 7, 6, 5, 4, 3, 2, 1, 2, 3, 4, 5, 6, 7, 8, 9],
}
list_of_items = [
from collections import Counter
import glob
# get list of filenames matching a pattern using glob
filenames = glob.glob("path/to/many/files/*.txt")
# create empty counter object
counts = Counter()
# loop over files, create a counter for each, and merge into counts
from collections import defaultdict
class BigramModel:
def train(self, training_set):
self.d = defaultdict(lambda: defaultdict(int))
for sent in training_set:
for w1, w2 in zip(sent[:-1], sent[1:]):
self.d[w1][w2] += 1