Skip to content

Instantly share code, notes, and snippets.

Avatar

Zach Coble coblezc

View GitHub Profile
@coblezc
coblezc / news-locations.py
Created October 25, 2017 15:01
Extract locations mentioned in articles on front page of nytimes.com
View news-locations.py
from __future__ import unicode_literals
from bs4 import BeautifulSoup
import urllib
from selenium import webdriver
import time
import spacy
from twython import Twython
import json
# "loadin' the pipeline"
@coblezc
coblezc / DNA.js
Created April 4, 2017 23:24
Family Recipes
View DNA.js
// The Nature of Code
// Daniel Shiffman
// http://natureofcode.com
// Genetic Algorithm, Evolving Shakespeare
// A class to describe a pseudo-DNA, i.e. genotype
// Here, a virtual organism's DNA is an array of character.
// Functionality:
// -- convert DNA into a string
View bs-scraper.py
import bs4
import urllib
url = "https://www.amazon.com/TopHeadwear-Face-Mask-Colors-Black/dp/B00X82QUHY/ref=sr_1_8?ie=UTF8&qid=1486007211&sr=8-8&keywords=black+ski+mask"
html = urllib.urlopen(url).read()
soup = bs4.BeautifulSoup(html, 'html.parser')
titles = soup.select('.review-data')
View dictionaries.py
# generate dictionaries to feed into markov.py
import markov
# cb speak
cb_terms_file = open("/Users/user/Documents/itp/rwet/final/categories/cbspeak-terms.txt").readlines()
cb_terms = markov.char_level_generate(cb_terms_file, 4, 100)
cb_defs_file = open("/Users/user/Documents/itp/rwet/final/categories/cbspeak-defs.txt").readlines()
cb_defs = markov.char_level_generate(cb_defs_file, 4, 100)
View generate_chorus_functions.py
# rwet hw 3
import random
from string import punctuation
# strip first ~50 lines of metadata
lyrics_file = "/path/to/file"
lyrics = [line.strip() for line in \
open(lyrics_file).readlines()
if len(line.strip()) > 0]
View worst_sellers.py
# hw3 for rwet
import urllib
import json
import random
nouns_data = urllib.urlopen("https://raw.githubusercontent.com/dariusk/corpora/master/data/words/nouns.json").read()
noun_json = json.loads(nouns_data)
url = "http://api.nytimes.com/svc/books/v3/lists/combined-print-and-e-book-fiction.json?&api-key=🐈"
View *rwet_midterm.py
# cribbed from Allison Parrish and Ross Goodwin
# https://gist.github.com/aparrish/ea3911c31cec8c858bd0/revisions
# https://gist.github.com/rossgoodwin/d45cba970add12c6190d
import random
from string import punctuation
# strip first ~50 lines of metadata
def not_with_semicolon(line):
if not line.startswith(';;;') and not line[0] in punctuation: