Skip to content

Instantly share code, notes, and snippets.

@dmurawsky
Created June 12, 2024 17:59
Show Gist options
  • Save dmurawsky/a8b69d948b7ccab5b38cc7daea390a63 to your computer and use it in GitHub Desktop.
Save dmurawsky/a8b69d948b7ccab5b38cc7daea390a63 to your computer and use it in GitHub Desktop.
Downlaod all infowars videos from tv.infowars.com

Instructions

  1. Download the script above into a folder where you want to download the videos.

  2. Open a terminal and run the below command to install dependancies:

pip install cloudscraper requests beautifulsoup4
  1. Run the script below to download all videos:
python infowars.py
import cloudscraper
import requests
from bs4 import BeautifulSoup
import os
# Directory to save videos
SAVE_DIR = os.getcwd()
# Initialize cloudscraper
scraper = cloudscraper.create_scraper()
# Function to download a file
def download_file(url, save_path):
try:
response = scraper.get(url, stream=True)
response.raise_for_status()
chunkCount = 0
with open(save_path, 'wb') as file:
for chunk in response.iter_content(chunk_size=1048576):
chunkCount = chunkCount + 1
print(f"Downloaded {chunkCount} MBs to {save_path}")
file.write(chunk)
print(f"Downloaded Completed: {save_path}")
except requests.RequestException as e:
print(f"Error downloading {url}: {e}")
# Function to get the video URL from a page
def get_video_url(page_id):
try:
url = f"https://tv.infowars.com/index/display/id/{page_id}"
response = scraper.get(url)
response.raise_for_status()
soup = BeautifulSoup(response.content, 'html.parser')
a_tags = soup.find_all('a', string='mp4')
for a in a_tags:
video_url = a.get('href')
if video_url and video_url.endswith('.mp4'):
return video_url
except requests.RequestException as e:
print(f"Error fetching page {page_id}: {e}")
return None
# Main function to iterate through pages and download videos
def download_videos(start_id, end_id):
for page_id in range(start_id, end_id + 1):
video_url = get_video_url(page_id)
if video_url:
file_name = video_url.split('/')[-1]
save_path = os.path.join(SAVE_DIR, file_name)
download_file(video_url, save_path)
else:
print(f"No MP4 link found on page {page_id}")
# Start and end page IDs
START_ID = 1
END_ID = 13762
# Start downloading videos
download_videos(START_ID, END_ID)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment