Skip to content

Instantly share code, notes, and snippets.

@SteampunkEngi
Last active February 24, 2023 09:05
Show Gist options
  • Save SteampunkEngi/a61c73545c27e9afc4b73316af7274dc to your computer and use it in GitHub Desktop.
Save SteampunkEngi/a61c73545c27e9afc4b73316af7274dc to your computer and use it in GitHub Desktop.
SteampunkEngi's Shitty Scraper Script for CivitAI
import requests
import os.path
import re
import time
# SteampunkEngi's Shitty Scraper Script for CivitAI. Source: https://gist.github.com/SteapunkEngi/a61c73545c27e9afc4b73316af7274dc
# This script automatically scrapes all models of a type. downloads the model/Model/whatever + all metadata + thumbnail picture
# Made for python 3.10, other versions not tested
# Looking for manual downloads? see https://github.com/axsddlr/civitai_downloader
# Rename "Checkpoint" to 'Checkpoint' 'TextualInversion' 'Hypernetwork' 'AestheticGradient' or 'LORA'. Case sensitive
# If script crashes, see console for what page was last completely downloaded, add that number to page=0
api_model_mostdownloaded_url = "https://civitai.com/api/v1/models?types=Checkpoint&sort=Most%20Downloaded&page=1"
#Change this to where you want your stuff to be saved
model_save_path = "C:\CIVITAI-Local\media\Checkpoint\\"
####
#### That's it, you're done. Nothing to edit. Run this file now.
####
####
#Don't rename this:
api_url = "https://civitai.com/api/v1/models/"
# Request another link when the previous one has finished downloading?
manualDownload = False
repeatManualDownloads = True
automaticModelDownload = True
def getrequestURL(modelID):
completeURL = api_url + str(modelID)
return completeURL
def getID():
modelLink = input("gib link: ")
# Get model ID based on link
modelID = modelLink.split("/")[4]
return modelID
def downloadModelWithMetadata(requestURL, downloadDirectory):
# insert model ID into api URL for get request
response = requests.get(requestURL)
while (response.status_code == 500 or response.status_code == 525):
print("Request returned 505. Trying again in 5 seconds.")
time.sleep(5)
response = requests.get(requestURL)
metadatajson = response.json()
# Get download URL from returned JSON
modelDownloadUrl = metadatajson["modelVersions"][0]["files"][0]["downloadUrl"]
downloadRequest = requests.get(modelDownloadUrl, allow_redirects=True)
while (response.status_code == 500 or response.status_code == 525):
print("Request returned 505. Trying again in 5 seconds.")
time.sleep(5)
downloadRequest = requests.get(modelDownloadUrl, allow_redirects=True)
# Set directory to save to by combining save path with file name
name_of_model_file = metadatajson["modelVersions"][0]["files"][0]["name"]
name_of_model_file = name_of_model_file.replace(
"<", "").replace(">", "")
name_of_model_file_without_extention = name_of_model_file.split(".safetensors")[
0].split(".pt")[0]
# download metadata as .metadata (you can open this with notepad)
combined_metadata_path = os.path.join(
downloadDirectory, name_of_model_file_without_extention + ".metainfo")
if not os.path.exists(combined_metadata_path):
with open(combined_metadata_path, "w", encoding="utf-8") as metainfo_file:
metainfo_file.write(str(metadatajson))
else:
print("! Skipping metadata download for: " +
name_of_model_file + ". Reason: File already exists.")
# download model
combined_model_path = os.path.join(
downloadDirectory, name_of_model_file
)
if not os.path.exists(combined_model_path):
with open(combined_model_path, 'wb') as content_file:
content_file.write(downloadRequest.content)
else:
print("! Skipping model download for: " +
name_of_model_file + ". Reason: File already exists.")
# download picture
combined_image_path = os.path.join(
downloadDirectory, name_of_model_file_without_extention + ".preview.png")
image_download_url = metadatajson["modelVersions"][0]["images"][0]["url"]
pictureRequest = requests.get(image_download_url, allow_redirects=True)
if not os.path.exists(combined_image_path):
with open(combined_image_path, 'wb') as content_file:
content_file.write(pictureRequest.content)
else:
print("! Skipping image download for: " +
name_of_model_file + ". Reason: File already exists.")
print("Downloading done.")
def manualModelDownloading():
requestURL = getrequestURL()
downloadModelWithMetadata(requestURL)
def downloadModelPage(metadatajson):
for Model in metadatajson["items"]:
ModelName = Model["name"]
saveFolder = model_save_path + re.sub(r'[^\w\d-]', '_', ModelName)
if not os.path.exists(saveFolder):
os.makedirs(saveFolder)
downloadModelWithMetadata(getrequestURL(Model["id"]), saveFolder)
print("Done downloading: " + ModelName)
print("--- Done downloading page: " +
str(metadatajson["metadata"]["currentPage"]) + " ---")
return
def downloadModels(api_Model_url):
# get all Models from highest to least highest rated
# get first page
# while True:
response = requests.get(api_Model_url)
while (response.status_code == 500 or response.status_code == 525):
print("Request returned 505. Trying again in 5 seconds.")
time.sleep(5)
response = requests.get(api_Model_url)
metadatajson = response.json()
downloadModelPage(metadatajson)
while (automaticModelDownload):
newApiModelUrl = metadatajson["metadata"]["nextPage"]
response = requests.get(newApiModelUrl)
metadatajson = response.json()
downloadModelPage(metadatajson)
def main():
if (manualDownload):
# run at least once
manualModelDownloading()
while repeatManualDownloads:
manualModelDownloading()
else:
downloadModels(api_model_mostdownloaded_url)
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment