Skip to content

Instantly share code, notes, and snippets.

Avatar

Ed Summers edsu

View GitHub Profile
View episode-docs-shared-subjects.py
import json
docs = json.load(open('Projects/airwaves/static/data/documents.json'))
for e in json.load(open('Projects/airwaves/static/data/episodes.json')):
for s in e['subject']:
for d in docs:
related = None
for s2 in d['subject']:
if s['name'] == s2['name']:
View test_api.py
import requests
token = 'CHANGEME'
url = 'https://api.twitter.com/2/tweets/search/all?expansions=author_id%2Cin_reply_to_user_id%2Creferenced_tweets.id%2Creferenced_tweets.id.author_id%2Centities.mentions.username%2Cattachments.poll_ids%2Cattachments.media_keys%2Cgeo.place_id&user.fields=created_at%2Cdescription%2Centities%2Cid%2Clocation%2Cname%2Cpinned_tweet_id%2Cprofile_image_url%2Cprotected%2Cpublic_metrics%2Curl%2Cusername%2Cverified%2Cwithheld&tweet.fields=attachments%2Cauthor_id%2Ccontext_annotations%2Cconversation_id%2Ccreated_at%2Centities%2Cgeo%2Cid%2Cin_reply_to_user_id%2Clang%2Cpublic_metrics%2Ctext%2Cpossibly_sensitive%2Creferenced_tweets%2Creply_settings%2Csource%2Cwithheld&media.fields=duration_ms%2Cheight%2Cmedia_key%2Cpreview_image_url%2Ctype%2Curl%2Cwidth%2Cpublic_metrics&poll.fields=duration_minutes%2Cend_datetime%2Cid%2Coptions%2Cvoting_status&place.fields=contained_within%2Ccountry%2Ccountry_code%2Cfull_name%2Cgeo%2Cid%2Cname%2Cplace_type&max_results=10&query=%23endsars&start_time=2006-0
View videos-directory.txt
videos
├── archive.txt
├── cbsnews_-embed
│   ├── heT2fS5d_26SdE0llYY3eqtTsruVy7Tu
│   │   ├── New_police_shooting_amid_nights_of_unrest_near_St._Louis.description
│   │   ├── New_police_shooting_amid_nights_of_unrest_near_St._Louis.en.ttml
│   │   ├── New_police_shooting_amid_nights_of_unrest_near_St._Louis.info.json
│   │   └── New_police_shooting_amid_nights_of_unrest_near_St._Louis.mp4
│   └── N9L3R_azcDe2VOImjI6dYCHk1mDnsMtq
│   ├── Missouri_residents_doubt_cops_story_of_fatal_teen_shooting.description
@edsu
edsu / errors.py
Last active Apr 29, 2021
This script surfs errors from the sample stream Twitter API. These come from trying to get user and tweet expansions for users and tweets that have been suspended, deleted or protected.
View errors.py
#!/usr/bin/env python3
# This script surfs errors from the sample stream Twitter API
# these come from trying to get user and tweet expansions for users and tweets
# that have been suspended, deleted or protected
import os
import twarc
import dotenv
View results.csv
We can't make this file beautiful and searchable because it's too large.
@edsu
edsu / rickrolls.csv
Last active Apr 16, 2021
twarc2 search 'youtube.com/watch?v=dQw4w9WgXcQ' --flatten --archive --limit 50000 | jq -r '.created_at[0:10]' | uniq -c | awk '{print $2 "," $1}'
View rickrolls.csv
2021-04-16 133
2021-04-15 370
2021-04-14 711
2021-04-13 281
2021-04-12 303
2021-04-11 305
2021-04-10 204
2021-04-09 274
2021-04-08 315
2021-04-07 398
View tweet2.json
{
"data": [
{
"created_at": "2021-04-10T21:13:07.000Z",
"lang": "en",
"conversation_id": "1380992262847483904",
"reply_settings": "everyone",
"text": "What do we want? Geo location!",
"author_id": "708113",
"public_metrics": {
View tweet1.json
{
"created_at": "Sat Apr 10 21:13:07 +0000 2021",
"id": 1380992262847484000,
"id_str": "1380992262847483904",
"full_text": "What do we want? Geo location!",
"truncated": false,
"display_text_range": [
0,
30
],
@edsu
edsu / extract_images.py
Last active Apr 10, 2021
Extract images from a WARC file. usage: extract_images.py <warc_file>
View extract_images.py
#!/usr/bin/env python3
import sys
import pathlib
from urllib.parse import urlparse
from warcio.archiveiterator import ArchiveIterator
def save(url, stream):
uri = urlparse(url)
View journal
#/bin/zsh
# journal is a little command to edit my markdown journal with vim. By default
# it will open the journal for today. Optionally supply a date (e.g. 2021-01-01)
# to edit an older entry.
journal_dir="/home/ed/Dropbox/Journal"
if [ "$1" ];
then