Last active
November 18, 2022 03:57
-
-
Save CharlesAverill/8f544cac4108aa7e1841c4345049a9c1 to your computer and use it in GitHub Desktop.
Twitter image downloader - using https://www.vicinitas.io/free-tools/download-user-tweets
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import requests | |
import os | |
import progressbar | |
def download_image(tweet_id, url): | |
dot_index = url.rindex(".") | |
q_index = url[dot_index:].index("?") if "?" in url else len(url) | |
fn = f"tweet_{tweet_id}" + (url[dot_index:q_index]) | |
if os.path.exists(fn): | |
return | |
img_data = requests.get(url).content | |
with open(fn, "wb") as handler: | |
handler.write(img_data) | |
df = pd.read_excel("caverill__user_tweets.xlsx") | |
df["Media URLs"] = df["Media URLs"].fillna(0) | |
media_urls_index = df.columns.get_loc("Media URLs") | |
for index, row in progressbar.progressbar(df.iterrows()): | |
for i, url in enumerate((row["Media URLs"], row["Unnamed: 16"], row["Unnamed: 17"], row["Unnamed: 16"])): | |
print(url) | |
if url == 0 or (type(url) == str and "m3u8" in url): | |
continue | |
try: | |
download_image(f"{row['Tweet Id']}_{i}", url) | |
except Exception as e: | |
print(f"Issue downloading row {index} with urls {url}") | |
print(e) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment