Skip to content

Instantly share code, notes, and snippets.

View thisismattmiller's full-sized avatar
😑
...

Matt Miller thisismattmiller

😑
...
View GitHub Profile
{
"russcarnahan.com": 16,
"secure.actblue.com": 13,
"secure.piryx.com": 6,
"services.myngp.com": 5,
"rickperry.org": 5,
"secure.mydccc.org": 5,
"markleyva.com": 4,
"clyburnforcongress.com": 4,
"johnsprattforcongress.com": 4,
<div><strong>imdb_id1</strong>: <a href="/movie/<%=imdb_id1%>"><%=imdb_id1%></a></div>
<div><strong>color1</strong>: <%=color1%></div>
<div><strong>director_name1</strong>: <%=director_name1%></div>
<div><strong>num_critic_for_reviews1</strong>: <%=num_critic_for_reviews1%></div>
<div><strong>duration1</strong>: <%=duration1%></div>
<div><strong>director_facebook_likes1</strong>: <%=director_facebook_likes1%></div>
<div><strong>actor_3_facebook_likes1</strong>: <%=actor_3_facebook_likes1%></div>
<div><strong>actor_2_name1</strong>: <%=actor_2_name1%></div>
<div><strong>actor_1_facebook_likes1</strong>: <%=actor_1_facebook_likes1%></div>
<div><strong>gross1</strong>: <%=gross1%></div>
@thisismattmiller
thisismattmiller / ch_sparql.py
Last active August 6, 2021 14:34
Example using python to interact with Carnegie Hall's SPARQL endpoint
import requests
import json
url = "http://data.carnegiehall.org/sparql/"
sparql = """
#Find works by string in the title (case-insensitive)
PREFIX dcterms: <http://purl.org/dc/terms/>
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
@thisismattmiller
thisismattmiller / get_links.py
Created April 26, 2021 14:50
Downloading PBCore records from https://americanarchive.org
@thisismattmiller
thisismattmiller / download_items.py
Created April 26, 2021 14:49
Downloading Harvard ART API records
import requests
import json
with open('key.json') as keyfile:
key_data = json.load(keyfile)
api_key = key_data['key']
#mediums_i_want = ["2028333","2028206","2028902","2035812","2028183","2035306"]
mediums_i_want = ["2028216"]
import requests
import json
with open("key.json") as keyjson:
key = json.load(keyjson)
def archive_search(keyword,limit=20,offset=0,total_only=0):
url = "https://www.brooklynmuseum.org/api/v2/archive/image/"
headers = {
{
"@context": {
"identifiers": "http://id.loc.gov/vocabulary/identifiers/",
"madsrdf": "http://www.loc.gov/mads/rdf/v1#",
"owl": "http://www.w3.org/2002/07/owl#",
"rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
"rdfs": "http://www.w3.org/2000/01/rdf-schema#",
"ri": "http://id.loc.gov/ontologies/RecordInfo#",
"skos": "http://www.w3.org/2004/02/skos/core#",
"xsd": "http://www.w3.org/2001/XMLSchema#",
@thisismattmiller
thisismattmiller / yahoo_sitemap.py
Created April 11, 2021 14:43
Download yahoo answers sitemap
import requests
import xml.etree.ElementTree as ET
import json
import multiprocessing
answer_urls = []
def do_work(url):
print(url)
urls = []
import json
with open('subjects.json') as infile:
data = json.load(infile)
geo_lookup = {}
for record in data:
@thisismattmiller
thisismattmiller / jsontocsv.py
Created February 26, 2021 03:21
jsontocsv.py
import json
import csv
import glob
# we need to know the headers, not all files may have all headers so make a big list first
headers = []
for filename in glob.glob("exhibitions/*.json"):
with open(filename, "r") as data:
json_file = json.load(data)