Skip to content

Instantly share code, notes, and snippets.

View edsu's full-sized avatar

Ed Summers edsu

View GitHub Profile
import json
docs = json.load(open('Projects/airwaves/static/data/documents.json'))
for e in json.load(open('Projects/airwaves/static/data/episodes.json')):
for s in e['subject']:
for d in docs:
related = None
for s2 in d['subject']:
if s['name'] == s2['name']:
import requests
token = 'CHANGEME'
url = 'https://api.twitter.com/2/tweets/search/all?expansions=author_id%2Cin_reply_to_user_id%2Creferenced_tweets.id%2Creferenced_tweets.id.author_id%2Centities.mentions.username%2Cattachments.poll_ids%2Cattachments.media_keys%2Cgeo.place_id&user.fields=created_at%2Cdescription%2Centities%2Cid%2Clocation%2Cname%2Cpinned_tweet_id%2Cprofile_image_url%2Cprotected%2Cpublic_metrics%2Curl%2Cusername%2Cverified%2Cwithheld&tweet.fields=attachments%2Cauthor_id%2Ccontext_annotations%2Cconversation_id%2Ccreated_at%2Centities%2Cgeo%2Cid%2Cin_reply_to_user_id%2Clang%2Cpublic_metrics%2Ctext%2Cpossibly_sensitive%2Creferenced_tweets%2Creply_settings%2Csource%2Cwithheld&media.fields=duration_ms%2Cheight%2Cmedia_key%2Cpreview_image_url%2Ctype%2Curl%2Cwidth%2Cpublic_metrics&poll.fields=duration_minutes%2Cend_datetime%2Cid%2Coptions%2Cvoting_status&place.fields=contained_within%2Ccountry%2Ccountry_code%2Cfull_name%2Cgeo%2Cid%2Cname%2Cplace_type&max_results=10&query=%23endsars&start_time=2006-0
videos
├── archive.txt
├── cbsnews_-embed
│   ├── heT2fS5d_26SdE0llYY3eqtTsruVy7Tu
│   │   ├── New_police_shooting_amid_nights_of_unrest_near_St._Louis.description
│   │   ├── New_police_shooting_amid_nights_of_unrest_near_St._Louis.en.ttml
│   │   ├── New_police_shooting_amid_nights_of_unrest_near_St._Louis.info.json
│   │   └── New_police_shooting_amid_nights_of_unrest_near_St._Louis.mp4
│   └── N9L3R_azcDe2VOImjI6dYCHk1mDnsMtq
│   ├── Missouri_residents_doubt_cops_story_of_fatal_teen_shooting.description
@edsu
edsu / errors.py
Last active April 29, 2021 16:18
This script surfs errors from the sample stream Twitter API. These come from trying to get user and tweet expansions for users and tweets that have been suspended, deleted or protected.
#!/usr/bin/env python3
# This script surfs errors from the sample stream Twitter API
# these come from trying to get user and tweet expansions for users and tweets
# that have been suspended, deleted or protected
import os
import twarc
import dotenv
We can't make this file beautiful and searchable because it's too large.
type,id,conversation_id,possibly_sensitive,source,created_at,text,author_id,reply_settings,context_annotations,lang,public_metrics.retweet_count,public_metrics.reply_count,public_metrics.like_count,public_metrics.quote_count,entities.urls,author.public_metrics.followers_count,author.public_metrics.following_count,author.public_metrics.tweet_count,author.public_metrics.listed_count,author.entities.url.urls,author.username,author.protected,author.name,author.created_at,author.profile_image_url,author.location,author.id,author.verified,author.description,author.url,referenced_tweets,entities.mentions,author.entities.description.mentions,__twarc.url,__twarc.version,__twarc.retrieved_at,entities.annotations,author.entities.description.urls,author.entities.description.hashtags,author.pinned_tweet_id,in_reply_to_user_id,in_reply_to_user.public_metrics.followers_count,in_reply_to_user.public_metrics.following_count,in_reply_to_user.public_metrics.tweet_count,in_reply_to_user.public_metrics.listed_count,in_reply_to_us
@edsu
edsu / rickrolls.csv
Last active April 16, 2021 14:20
twarc2 search 'youtube.com/watch?v=dQw4w9WgXcQ' --flatten --archive --limit 50000 | jq -r '.created_at[0:10]' | uniq -c | awk '{print $2 "," $1}'
2021-04-16 133
2021-04-15 370
2021-04-14 711
2021-04-13 281
2021-04-12 303
2021-04-11 305
2021-04-10 204
2021-04-09 274
2021-04-08 315
2021-04-07 398
{
"data": [
{
"created_at": "2021-04-10T21:13:07.000Z",
"lang": "en",
"conversation_id": "1380992262847483904",
"reply_settings": "everyone",
"text": "What do we want? Geo location!",
"author_id": "708113",
"public_metrics": {
{
"created_at": "Sat Apr 10 21:13:07 +0000 2021",
"id": 1380992262847484000,
"id_str": "1380992262847483904",
"full_text": "What do we want? Geo location!",
"truncated": false,
"display_text_range": [
0,
30
],
@edsu
edsu / extract_images.py
Last active September 2, 2023 18:43
Extract images from a WARC file. usage: extract_images.py <warc_file>
#!/usr/bin/env python3
import sys
import pathlib
from urllib.parse import urlparse
from warcio.archiveiterator import ArchiveIterator
def save(url, stream):
uri = urlparse(url)
#/bin/zsh
# journal is a little command to edit my markdown journal with vim. By default
# it will open the journal for today. Optionally supply a date (e.g. 2021-01-01)
# to edit an older entry.
journal_dir="/home/ed/Dropbox/Journal"
if [ "$1" ];
then