Skip to content

Instantly share code, notes, and snippets.

Avatar

Ed Summers edsu

View GitHub Profile
View naeb-page-counts.py
#!/usr/bin/env python3
import internetarchive
ia = internetarchive.get_session()
print("ids,pages")
for result in ia.search_items('collection:mediahistory creator:National Association of Educational Broadcasters'):
ia_id = result['identifier']
View blacklivesmatter-counts.csv
start end hour_count
2021-06-17T23:16:56.000Z 2021-06-18T00:00:00.000Z 232
2021-06-18T00:00:00.000Z 2021-06-18T01:00:00.000Z 267
2021-06-18T01:00:00.000Z 2021-06-18T02:00:00.000Z 353
2021-06-18T02:00:00.000Z 2021-06-18T03:00:00.000Z 260
2021-06-18T03:00:00.000Z 2021-06-18T04:00:00.000Z 286
2021-06-18T04:00:00.000Z 2021-06-18T05:00:00.000Z 278
2021-06-18T05:00:00.000Z 2021-06-18T06:00:00.000Z 185
2021-06-18T06:00:00.000Z 2021-06-18T07:00:00.000Z 225
2021-06-18T07:00:00.000Z 2021-06-18T08:00:00.000Z 241
View episode-docs-shared-subjects.py
import json
docs = json.load(open('Projects/airwaves/static/data/documents.json'))
for e in json.load(open('Projects/airwaves/static/data/episodes.json')):
for s in e['subject']:
for d in docs:
related = None
for s2 in d['subject']:
if s['name'] == s2['name']:
View test_api.py
import requests
token = 'CHANGEME'
url = 'https://api.twitter.com/2/tweets/search/all?expansions=author_id%2Cin_reply_to_user_id%2Creferenced_tweets.id%2Creferenced_tweets.id.author_id%2Centities.mentions.username%2Cattachments.poll_ids%2Cattachments.media_keys%2Cgeo.place_id&user.fields=created_at%2Cdescription%2Centities%2Cid%2Clocation%2Cname%2Cpinned_tweet_id%2Cprofile_image_url%2Cprotected%2Cpublic_metrics%2Curl%2Cusername%2Cverified%2Cwithheld&tweet.fields=attachments%2Cauthor_id%2Ccontext_annotations%2Cconversation_id%2Ccreated_at%2Centities%2Cgeo%2Cid%2Cin_reply_to_user_id%2Clang%2Cpublic_metrics%2Ctext%2Cpossibly_sensitive%2Creferenced_tweets%2Creply_settings%2Csource%2Cwithheld&media.fields=duration_ms%2Cheight%2Cmedia_key%2Cpreview_image_url%2Ctype%2Curl%2Cwidth%2Cpublic_metrics&poll.fields=duration_minutes%2Cend_datetime%2Cid%2Coptions%2Cvoting_status&place.fields=contained_within%2Ccountry%2Ccountry_code%2Cfull_name%2Cgeo%2Cid%2Cname%2Cplace_type&max_results=10&query=%23endsars&start_time=2006-0
View videos-directory.txt
videos
├── archive.txt
├── cbsnews_-embed
│   ├── heT2fS5d_26SdE0llYY3eqtTsruVy7Tu
│   │   ├── New_police_shooting_amid_nights_of_unrest_near_St._Louis.description
│   │   ├── New_police_shooting_amid_nights_of_unrest_near_St._Louis.en.ttml
│   │   ├── New_police_shooting_amid_nights_of_unrest_near_St._Louis.info.json
│   │   └── New_police_shooting_amid_nights_of_unrest_near_St._Louis.mp4
│   └── N9L3R_azcDe2VOImjI6dYCHk1mDnsMtq
│   ├── Missouri_residents_doubt_cops_story_of_fatal_teen_shooting.description
@edsu
edsu / errors.py
Last active Apr 29, 2021
This script surfs errors from the sample stream Twitter API. These come from trying to get user and tweet expansions for users and tweets that have been suspended, deleted or protected.
View errors.py
#!/usr/bin/env python3
# This script surfs errors from the sample stream Twitter API
# these come from trying to get user and tweet expansions for users and tweets
# that have been suspended, deleted or protected
import os
import twarc
import dotenv
View results.csv
We can't make this file beautiful and searchable because it's too large.
@edsu
edsu / rickrolls.csv
Last active Apr 16, 2021
twarc2 search 'youtube.com/watch?v=dQw4w9WgXcQ' --flatten --archive --limit 50000 | jq -r '.created_at[0:10]' | uniq -c | awk '{print $2 "," $1}'
View rickrolls.csv
2021-04-16 133
2021-04-15 370
2021-04-14 711
2021-04-13 281
2021-04-12 303
2021-04-11 305
2021-04-10 204
2021-04-09 274
2021-04-08 315
2021-04-07 398
View tweet2.json
{
"data": [
{
"created_at": "2021-04-10T21:13:07.000Z",
"lang": "en",
"conversation_id": "1380992262847483904",
"reply_settings": "everyone",
"text": "What do we want? Geo location!",
"author_id": "708113",
"public_metrics": {
View tweet1.json
{
"created_at": "Sat Apr 10 21:13:07 +0000 2021",
"id": 1380992262847484000,
"id_str": "1380992262847483904",
"full_text": "What do we want? Geo location!",
"truncated": false,
"display_text_range": [
0,
30
],