Skip to content

Instantly share code, notes, and snippets.

@umstek
Created April 15, 2024 10:01
Show Gist options
  • Save umstek/89b822fd864b330c32cea2f8e16214d5 to your computer and use it in GitHub Desktop.
Save umstek/89b822fd864b330c32cea2f8e16214d5 to your computer and use it in GitHub Desktop.
downloader
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": [],
"authorship_tag": "ABX9TyM+Nh5i8UEq+9W/lesUsuFv",
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/gist/umstek/89b822fd864b330c32cea2f8e16214d5/downloader.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "Mu2BEttX2kbk",
"outputId": "e6dd7ceb-9526-4d38-c816-708f9d9224e3"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount(\"/content/gdrive\", force_remount=True).\n"
]
}
],
"source": [
"from google.colab import drive\n",
"drive.mount('/content/gdrive')"
]
},
{
"cell_type": "code",
"source": [
"# prompt: Scrape this page https://archive.org/download/x-men-the-animated-series-1080p-ai-upscale_202204 and extract all links to an mp4 file\n",
"\n",
"import requests\n",
"from bs4 import BeautifulSoup\n",
"\n",
"url = 'https://archive.org/download/x-men-the-animated-series-1080p-ai-upscale_202204'\n",
"\n",
"response = requests.get(url)\n",
"soup = BeautifulSoup(response.text, 'html.parser')\n",
"\n",
"mp4_links = []\n",
"\n",
"for link in soup.find_all('a'):\n",
" if 'href' in link.attrs and link['href'].endswith('.mp4'):\n",
" mp4_links.append(url + '/' + link['href'])\n",
"\n",
"print(len(mp4_links))"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "ZUfdoJph3dpJ",
"outputId": "aaa754c9-76cb-4e6c-b623-e15b37f60172"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"76\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"import urllib.parse\n",
"\n",
"def clean_file_name(fn):\n",
" return urllib.parse.unquote(fn)\n"
],
"metadata": {
"id": "QXPMp8Nd89PJ"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# prompt: Download each link in the above mp4_links array into /content/gdrive/MyDrive/downloads\n",
"\n",
"import os\n",
"import requests\n",
"\n",
"download_dir = '/content/gdrive/MyDrive/downloads'\n",
"\n",
"# Download each MP4 file\n",
"for link in mp4_links:\n",
" filename = clean_file_name(os.path.basename(link))\n",
" response = requests.get(link)\n",
" file_path = os.path.join(download_dir, filename)\n",
" print(file_path)\n",
" with open(file_path, 'wb') as f:\n",
" f.write(response.content)\n"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "NZIIxjOl7jbC",
"outputId": "917f1321-9a7a-447a-b8a1-a2e5a9446514"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"/content/gdrive/MyDrive/downloads/EP01 - Night of the Sentinels.mp4\n",
"/content/gdrive/MyDrive/downloads/EP02 - Night of the Sentinels Pt. 2.mp4\n",
"/content/gdrive/MyDrive/downloads/EP03 - Enter Magneto.mp4\n",
"/content/gdrive/MyDrive/downloads/EP04 - Deadly Reunions.mp4\n",
"/content/gdrive/MyDrive/downloads/EP05 - Captive Hearts.mp4\n",
"/content/gdrive/MyDrive/downloads/EP06 - Cold Vengeance.mp4\n",
"/content/gdrive/MyDrive/downloads/EP07 - Slave Island.mp4\n",
"/content/gdrive/MyDrive/downloads/EP08 - The Unstoppable Juggernaut.mp4\n",
"/content/gdrive/MyDrive/downloads/EP09 - The Cure.mp4\n",
"/content/gdrive/MyDrive/downloads/EP10 - Come the Apocalypse.mp4\n",
"/content/gdrive/MyDrive/downloads/EP11 - Days of Future Past.mp4\n",
"/content/gdrive/MyDrive/downloads/EP12 - Days of Future Past Pt. 2.mp4\n",
"/content/gdrive/MyDrive/downloads/EP13 - The Final Decision.mp4\n",
"/content/gdrive/MyDrive/downloads/EP14 - Till Death Do Us Part.mp4\n",
"/content/gdrive/MyDrive/downloads/EP15 - Till Death Do Us Part Pt. 2.mp4\n",
"/content/gdrive/MyDrive/downloads/EP16 - Whatever It Takes.mp4\n",
"/content/gdrive/MyDrive/downloads/EP17 - Red Dawn.mp4\n",
"/content/gdrive/MyDrive/downloads/EP18 - Repo Man.mp4\n",
"/content/gdrive/MyDrive/downloads/EP19 - X-Ternally Yours.mp4\n",
"/content/gdrive/MyDrive/downloads/EP20 - Time Fugitives.mp4\n",
"/content/gdrive/MyDrive/downloads/EP21 - Time Fugitives Pt. 2.mp4\n",
"/content/gdrive/MyDrive/downloads/EP22 - A Rogue's Tale.mp4\n",
"/content/gdrive/MyDrive/downloads/EP23 - Beauty & the Beast.mp4\n",
"/content/gdrive/MyDrive/downloads/EP24 - Mojovision.mp4\n",
"/content/gdrive/MyDrive/downloads/EP25 - Reunion.mp4\n",
"/content/gdrive/MyDrive/downloads/EP26 - Reunion Pt. 2.mp4\n",
"/content/gdrive/MyDrive/downloads/EP27 - Out of the Past.mp4\n",
"/content/gdrive/MyDrive/downloads/EP28 - Out of the Past Pt. 2.mp4\n",
"/content/gdrive/MyDrive/downloads/EP29 - The Phoenix Saga, Part I Sacrifice.mp4\n",
"/content/gdrive/MyDrive/downloads/EP30 - The Phoenix Saga, Part II The Dark Shroud.mp4\n",
"/content/gdrive/MyDrive/downloads/EP31 - The Phoenix Saga, Part III The Cry of the Banshee.mp4\n",
"/content/gdrive/MyDrive/downloads/EP32 - The Phoenix Saga, Part IV The Starjammers.mp4\n",
"/content/gdrive/MyDrive/downloads/EP33 - The Phoenix Saga, Part V Child of Light.mp4\n",
"/content/gdrive/MyDrive/downloads/EP34 - No Mutant Is an Island.mp4\n",
"/content/gdrive/MyDrive/downloads/EP35 - Obsession.mp4\n",
"/content/gdrive/MyDrive/downloads/EP36 - Longshot.mp4\n",
"/content/gdrive/MyDrive/downloads/EP37 - Cold Comfort.mp4\n",
"/content/gdrive/MyDrive/downloads/EP38 - Savage Land, Strange Heart - Part One.mp4\n",
"/content/gdrive/MyDrive/downloads/EP39 - Savage Land, Strange Heart - Part Two.mp4\n",
"/content/gdrive/MyDrive/downloads/EP40 - The Dark Phoenix, Part I Dazzled.mp4\n",
"/content/gdrive/MyDrive/downloads/EP41 - The Dark Phoenix, Part II The Inner Circle.mp4\n",
"/content/gdrive/MyDrive/downloads/EP42 - The Dark Phoenix, Part III The Dark Phoenix.mp4\n",
"/content/gdrive/MyDrive/downloads/EP43 - The Dark Phoenix, Part IV The Fate of the Phoenix.mp4\n",
"/content/gdrive/MyDrive/downloads/EP44 - Orphan's End.mp4\n",
"/content/gdrive/MyDrive/downloads/EP45 - Love in Vain.mp4\n",
"/content/gdrive/MyDrive/downloads/EP46 - The Juggernaut Returns.mp4\n",
"/content/gdrive/MyDrive/downloads/EP47 - A Deal with the Devil.mp4\n",
"/content/gdrive/MyDrive/downloads/EP48 - Sanctuary (Part 1).mp4\n",
"/content/gdrive/MyDrive/downloads/EP49 - Sanctuary (Part 2).mp4\n",
"/content/gdrive/MyDrive/downloads/EP50 - Xavier Remembers.mp4\n",
"/content/gdrive/MyDrive/downloads/EP51 - Courage.mp4\n",
"/content/gdrive/MyDrive/downloads/EP52 - Secrets, Not Long Buried.mp4\n",
"/content/gdrive/MyDrive/downloads/EP53 - Nightcrawler.mp4\n",
"/content/gdrive/MyDrive/downloads/EP54 - One Man's Worth (Part 1).mp4\n",
"/content/gdrive/MyDrive/downloads/EP55 - One Man's Worth (Part 2).mp4\n",
"/content/gdrive/MyDrive/downloads/EP56 - Proteus (Part 1).mp4\n",
"/content/gdrive/MyDrive/downloads/EP57 - Proteus (Part 2).mp4\n",
"/content/gdrive/MyDrive/downloads/EP58 - Family Ties.mp4\n",
"/content/gdrive/MyDrive/downloads/EP59 - Bloodlines.mp4\n",
"/content/gdrive/MyDrive/downloads/EP60 - Lotus and the Steel.mp4\n",
"/content/gdrive/MyDrive/downloads/EP61 - Weapon X, Lies, and Video Tape.mp4\n",
"/content/gdrive/MyDrive/downloads/EP62 - Have Yourself a Morlock Little X-Mas.mp4\n",
"/content/gdrive/MyDrive/downloads/EP63 - Beyond Good and Evil (Part 1) The End of Time.mp4\n",
"/content/gdrive/MyDrive/downloads/EP64 - Beyond Good and Evil (Part 2) Promise of Apocalypse.mp4\n",
"/content/gdrive/MyDrive/downloads/EP65 - Beyond Good and Evil (Part 3) The Lazarus Chamber.mp4\n",
"/content/gdrive/MyDrive/downloads/EP66 - Beyond Good and Evil (Part 4) End and Beginning.mp4\n",
"/content/gdrive/MyDrive/downloads/EP67 - The Phalanx Covenant (Part 1).mp4\n",
"/content/gdrive/MyDrive/downloads/EP68 - The Phalanx Covenant (Part 2).mp4\n",
"/content/gdrive/MyDrive/downloads/EP69 - Storm Front (Part 1).mp4\n",
"/content/gdrive/MyDrive/downloads/EP70 - Storm Front (Part 2).mp4\n",
"/content/gdrive/MyDrive/downloads/EP71 - The Fifth Horseman.mp4\n",
"/content/gdrive/MyDrive/downloads/EP72 - Jubilees Fairytale Theatre.mp4\n",
"/content/gdrive/MyDrive/downloads/EP73 - Old Soldiers.mp4\n",
"/content/gdrive/MyDrive/downloads/EP74 - Hidden Agendas.mp4\n",
"/content/gdrive/MyDrive/downloads/EP75 - Descent.mp4\n",
"/content/gdrive/MyDrive/downloads/EP76 - Graduation Day.mp4\n"
]
}
]
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment