Skip to content

Instantly share code, notes, and snippets.

@CharlesAverill
Last active November 18, 2022 03:57
Show Gist options
  • Save CharlesAverill/8f544cac4108aa7e1841c4345049a9c1 to your computer and use it in GitHub Desktop.
Save CharlesAverill/8f544cac4108aa7e1841c4345049a9c1 to your computer and use it in GitHub Desktop.
import pandas as pd
import requests
import os
import progressbar
def download_image(tweet_id, url):
dot_index = url.rindex(".")
q_index = url[dot_index:].index("?") if "?" in url else len(url)
fn = f"tweet_{tweet_id}" + (url[dot_index:q_index])
if os.path.exists(fn):
return
img_data = requests.get(url).content
with open(fn, "wb") as handler:
handler.write(img_data)
df = pd.read_excel("caverill__user_tweets.xlsx")
df["Media URLs"] = df["Media URLs"].fillna(0)
media_urls_index = df.columns.get_loc("Media URLs")
for index, row in progressbar.progressbar(df.iterrows()):
for i, url in enumerate((row["Media URLs"], row["Unnamed: 16"], row["Unnamed: 17"], row["Unnamed: 16"])):
print(url)
if url == 0 or (type(url) == str and "m3u8" in url):
continue
try:
download_image(f"{row['Tweet Id']}_{i}", url)
except Exception as e:
print(f"Issue downloading row {index} with urls {url}")
print(e)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment