Skip to content

Instantly share code, notes, and snippets.

@Tamschi
Last active November 18, 2022 09:20
Show Gist options
  • Save Tamschi/a88873caeab29c1468d390b43233b75b to your computer and use it in GitHub Desktop.
Save Tamschi/a88873caeab29c1468d390b43233b75b to your computer and use it in GitHub Desktop.
Twitter User Info Downloader for Twitter Data Export Files

Twitter User Info Downloader for Twitter Data Export Files

Respects the rate limit nicely, shows a progress bar, handles some errors (you may need to adjust this if it gets stuck) and skips already downloaded valid profiles when re-run.

How to Use

  • Download this gist as zip file and extract it into a new folder somewhere.

  • Move main.rs in a src/ subfolder.

  • Create a users/ subfolder.

  • Fill in your API login info into keys.toml.

  • Copy your follower.js and following.js files into this folder here.

  • Run: cargo run

    (If you don't have Rust installed yet, you can get it from here.)

[package]
name = "social-graph-downloader"
version = "0.0.0"
edition = "2021"
publish = false
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
chrono = "0.4.23"
egg-mode = { version = "0.16.0", default-features = false, features = ["hyper-rustls"] }
egg-mode-extras = "0.3.3"
futures = "0.3.25"
progress_bar = "1.0.3"
serde = { version = "1.0.147", features = ["derive"] }
serde-aux = { version = "4.1.0", default-features = false }
serde_json = "1.0.87"
tap = "1.0.1"
tokio = { version = "1.21.2", features = ["rt-multi-thread", "macros"] }
toml = "0.5.9"
[twitter]
consumerKey = ""
consumerSecret = ""
accessToken = ""
accessTokenSecret = ""
use std::{collections::BTreeSet, fs, future::Future, time::Duration};
use chrono::Utc;
use egg_mode::{
error::{Error, TwitterErrorCode},
user::TwitterUser,
};
use egg_mode_extras::{client::TokenType, Client};
use progress_bar::{
finalize_progress_bar, inc_progress_bar, init_progress_bar, print_progress_bar_info,
set_progress_bar_action, Color, Style,
};
use serde::Deserialize;
use serde_aux::prelude::deserialize_number_from_string;
use tap::{Conv, TryConv};
use tokio::time::sleep;
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
run().await
}
async fn run() -> Result<(), Box<dyn std::error::Error>> {
init_progress_bar(0);
set_progress_bar_action("Logging in", Color::Blue, Style::Bold);
let client = Client::from_config_file("keys.toml").await?;
set_progress_bar_action("follower.js", Color::Blue, Style::Bold);
let followers: Vec<Follower> = serde_json::from_str(
fs::read_to_string("follower.js")?.trim_start_matches("window.YTD.follower.part0 = "),
)?;
set_progress_bar_action("following.js", Color::Blue, Style::Bold);
let following: Vec<Following> = serde_json::from_str(
fs::read_to_string("following.js")?.trim_start_matches("window.YTD.following.part0 = "),
)?;
set_progress_bar_action("combining", Color::Blue, Style::Bold);
let links: Vec<Link> = followers
.into_iter()
.map(|f| f.follower)
.chain(following.into_iter().map(|f| f.following))
.collect::<BTreeSet<_>>()
.into_iter()
.collect();
init_progress_bar(links.len());
set_progress_bar_action("saving", Color::Blue, Style::Bold);
for link in links {
let path = format!("users/{}.json", link.account_id);
if let Ok(data) = fs::read(&path) {
if serde_json::from_slice::<TwitterUser>(&data).is_ok() {
inc_progress_bar();
continue;
}
}
let user = retry("user", || {
client.lookup_user(link.account_id, TokenType::User)
})
.await;
let Some(user) = user else {
print_progress_bar_info(
"skipped",
&format!("{}", link.account_id),
Color::White,
Style::Normal,
);
continue;
};
print_progress_bar_info(
"user",
&format!("{} (@{})", user.name, user.screen_name),
Color::White,
Style::Normal,
);
fs::write(&path, serde_json::to_string_pretty(&user)?)?;
inc_progress_bar();
}
finalize_progress_bar();
Ok(())
}
#[derive(Debug, Deserialize, PartialEq, Eq, PartialOrd, Ord)]
#[serde(rename_all = "camelCase", deny_unknown_fields)]
struct Follower {
follower: Link,
}
#[derive(Debug, Deserialize, PartialEq, Eq, PartialOrd, Ord)]
#[serde(rename_all = "camelCase", deny_unknown_fields)]
struct Following {
following: Link,
}
#[derive(Debug, Deserialize, PartialEq, Eq, PartialOrd, Ord)]
#[serde(rename_all = "camelCase", deny_unknown_fields)]
struct Link {
#[serde(deserialize_with = "deserialize_number_from_string")]
account_id: u64,
#[allow(dead_code)]
#[serde(skip_serializing)]
user_link: String,
}
async fn retry<T, F: Future<Output = Result<T, Error>>>(
occasion: &str,
mut f: impl FnMut() -> F,
) -> Option<T> {
loop {
if let Some(t) = handle_error(occasion, f().await).await {
break t;
}
}
}
async fn handle_error<T>(occasion: &str, result: Result<T, Error>) -> Option<Option<T>> {
match result {
Ok(t) => Some(Some(t)),
Err(error) => {
print_progress_bar_info(occasion, &error.to_string(), Color::Red, Style::Bold);
match error {
Error::TwitterError(_, errors) => {
for error in errors.errors {
match error {
TwitterErrorCode {
code: 50// User not found.
| 63 // User has been suspended.
,
..
} => return Some(None),
_ => todo!("{}", error),
}
}
}
Error::NetError(_) => (),
Error::RateLimit(until) => {
let duration = Duration::from_secs(
(until.conv::<i64>() - Utc::now().timestamp())
.try_conv::<u64>()
.unwrap_or_default(),
);
print_progress_bar_info(
"Sleeping...",
&format!("{:?}", duration),
Color::DarkGray,
Style::Normal,
);
sleep(duration).await
}
Error::BadUrl => todo!(),
Error::InvalidResponse(_, _) => todo!(),
Error::MissingValue(_) => todo!(),
Error::FutureAlreadyCompleted => todo!(),
Error::MediaError(_) => todo!(),
Error::BadStatus(_) => (),
Error::TlsError(_) => todo!(),
Error::IOError(_) => todo!(),
Error::DeserializeError(_) => todo!(),
Error::TimestampParseError(_) => todo!(),
Error::TimerShutdownError(_) => todo!(),
Error::HeaderParseError(_) => todo!(),
Error::HeaderConvertError(_) => todo!(),
}
None
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment