SteampunkEngi/CivitAIScraper.py

## CivitAIScraper.py
import requests
import os.path
import re
import time

# SteampunkEngi's Shitty Scraper Script for CivitAI. Source: https://gist.github.com/SteapunkEngi/a61c73545c27e9afc4b73316af7274dc
# This script automatically scrapes all models of a type. downloads the model/Model/whatever + all metadata + thumbnail picture
# Made for python 3.10, other versions not tested
# Looking for manual downloads? see https://github.com/axsddlr/civitai_downloader

# Rename "Checkpoint" to 'Checkpoint' 'TextualInversion' 'Hypernetwork' 'AestheticGradient' or 'LORA'. Case sensitive
# If script crashes, see console for what page was last completely downloaded, add that number to page=0
api_model_mostdownloaded_url = "https://civitai.com/api/v1/models?types=Checkpoint&sort=Most%20Downloaded&page=1"

#Change this to where you want your stuff to be saved
model_save_path = "C:\CIVITAI-Local\media\Checkpoint\\"

####
#### That's it, you're done. Nothing to edit. Run this file now.
####
####

#Don't rename this:
api_url = "https://civitai.com/api/v1/models/"

# Request another link when the previous one has finished downloading?
manualDownload = False
repeatManualDownloads = True
automaticModelDownload = True


def getrequestURL(modelID):
    completeURL = api_url + str(modelID)
    return completeURL


def getID():
    modelLink = input("gib link: ")

    # Get model ID based on link
    modelID = modelLink.split("/")[4]

    return modelID


def downloadModelWithMetadata(requestURL, downloadDirectory):

    # insert model ID into api URL for get request
    response = requests.get(requestURL)
    while (response.status_code == 500 or response.status_code == 525):
        print("Request returned 505. Trying again in 5 seconds.")
        time.sleep(5)
        response = requests.get(requestURL)
    metadatajson = response.json()

    # Get download URL from returned JSON
    modelDownloadUrl = metadatajson["modelVersions"][0]["files"][0]["downloadUrl"]

    downloadRequest = requests.get(modelDownloadUrl, allow_redirects=True)
    while (response.status_code == 500 or response.status_code == 525):
        print("Request returned 505. Trying again in 5 seconds.")
        time.sleep(5)
        downloadRequest = requests.get(modelDownloadUrl, allow_redirects=True)

    # Set directory to save to by combining save path with file name
    name_of_model_file = metadatajson["modelVersions"][0]["files"][0]["name"]
    name_of_model_file = name_of_model_file.replace(
        "<", "").replace(">", "")

    name_of_model_file_without_extention = name_of_model_file.split(".safetensors")[
        0].split(".pt")[0]
    # download metadata as .metadata (you can open this with notepad)
    combined_metadata_path = os.path.join(
        downloadDirectory, name_of_model_file_without_extention + ".metainfo")
    if not os.path.exists(combined_metadata_path):
        with open(combined_metadata_path, "w", encoding="utf-8") as metainfo_file:
            metainfo_file.write(str(metadatajson))
    else:
        print("! Skipping metadata download for: " +
              name_of_model_file + ". Reason: File already exists.")

    # download model
    combined_model_path = os.path.join(
        downloadDirectory, name_of_model_file
    )

    if not os.path.exists(combined_model_path):
        with open(combined_model_path, 'wb') as content_file:
            content_file.write(downloadRequest.content)
    else:
        print("! Skipping model download for: " +
              name_of_model_file + ". Reason: File already exists.")

    # download picture
    combined_image_path = os.path.join(
        downloadDirectory, name_of_model_file_without_extention + ".preview.png")
    image_download_url = metadatajson["modelVersions"][0]["images"][0]["url"]
    pictureRequest = requests.get(image_download_url, allow_redirects=True)
    if not os.path.exists(combined_image_path):
        with open(combined_image_path, 'wb') as content_file:
            content_file.write(pictureRequest.content)
    else:
        print("! Skipping image download for: " +
              name_of_model_file + ". Reason: File already exists.")

    print("Downloading done.")


def manualModelDownloading():
    requestURL = getrequestURL()
    downloadModelWithMetadata(requestURL)


def downloadModelPage(metadatajson):

    for Model in metadatajson["items"]:
        ModelName = Model["name"]
        saveFolder = model_save_path + re.sub(r'[^\w\d-]', '_', ModelName)
        if not os.path.exists(saveFolder):
            os.makedirs(saveFolder)
        downloadModelWithMetadata(getrequestURL(Model["id"]), saveFolder)
        print("Done downloading: " + ModelName)
    print("--- Done downloading page: " +
          str(metadatajson["metadata"]["currentPage"]) + " ---")
    return


def downloadModels(api_Model_url):
    # get all Models from highest to least highest rated
    # get first page
    # while True:
    response = requests.get(api_Model_url)
    while (response.status_code == 500 or response.status_code == 525):
        print("Request returned 505. Trying again in 5 seconds.")
        time.sleep(5)
        response = requests.get(api_Model_url)
    metadatajson = response.json()
    downloadModelPage(metadatajson)

    while (automaticModelDownload):
        newApiModelUrl = metadatajson["metadata"]["nextPage"]
        response = requests.get(newApiModelUrl)
        metadatajson = response.json()
        downloadModelPage(metadatajson)


def main():
    if (manualDownload):
        # run at least once
        manualModelDownloading()
        while repeatManualDownloads:
            manualModelDownloading()
    else:
        downloadModels(api_model_mostdownloaded_url)

main()
	import requests
	import os.path
	import re
	import time

	# SteampunkEngi's Shitty Scraper Script for CivitAI. Source: https://gist.github.com/SteapunkEngi/a61c73545c27e9afc4b73316af7274dc
	# This script automatically scrapes all models of a type. downloads the model/Model/whatever + all metadata + thumbnail picture
	# Made for python 3.10, other versions not tested
	# Looking for manual downloads? see https://github.com/axsddlr/civitai_downloader

	# Rename "Checkpoint" to 'Checkpoint' 'TextualInversion' 'Hypernetwork' 'AestheticGradient' or 'LORA'. Case sensitive
	# If script crashes, see console for what page was last completely downloaded, add that number to page=0
	api_model_mostdownloaded_url = "https://civitai.com/api/v1/models?types=Checkpoint&sort=Most%20Downloaded&page=1"

	#Change this to where you want your stuff to be saved
	model_save_path = "C:\CIVITAI-Local\media\Checkpoint\\"

	####
	#### That's it, you're done. Nothing to edit. Run this file now.
	####
	####

	#Don't rename this:
	api_url = "https://civitai.com/api/v1/models/"

	# Request another link when the previous one has finished downloading?
	manualDownload = False
	repeatManualDownloads = True
	automaticModelDownload = True


	def getrequestURL(modelID):
	completeURL = api_url + str(modelID)
	return completeURL


	def getID():
	modelLink = input("gib link: ")

	# Get model ID based on link
	modelID = modelLink.split("/")[4]

	return modelID


	def downloadModelWithMetadata(requestURL, downloadDirectory):

	# insert model ID into api URL for get request
	response = requests.get(requestURL)
	while (response.status_code == 500 or response.status_code == 525):
	print("Request returned 505. Trying again in 5 seconds.")
	time.sleep(5)
	response = requests.get(requestURL)
	metadatajson = response.json()

	# Get download URL from returned JSON
	modelDownloadUrl = metadatajson["modelVersions"][0]["files"][0]["downloadUrl"]

	downloadRequest = requests.get(modelDownloadUrl, allow_redirects=True)
	while (response.status_code == 500 or response.status_code == 525):
	print("Request returned 505. Trying again in 5 seconds.")
	time.sleep(5)
	downloadRequest = requests.get(modelDownloadUrl, allow_redirects=True)

	# Set directory to save to by combining save path with file name
	name_of_model_file = metadatajson["modelVersions"][0]["files"][0]["name"]
	name_of_model_file = name_of_model_file.replace(
	"<", "").replace(">", "")

	name_of_model_file_without_extention = name_of_model_file.split(".safetensors")[
	0].split(".pt")[0]
	# download metadata as .metadata (you can open this with notepad)
	combined_metadata_path = os.path.join(
	downloadDirectory, name_of_model_file_without_extention + ".metainfo")
	if not os.path.exists(combined_metadata_path):
	with open(combined_metadata_path, "w", encoding="utf-8") as metainfo_file:
	metainfo_file.write(str(metadatajson))
	else:
	print("! Skipping metadata download for: " +
	name_of_model_file + ". Reason: File already exists.")

	# download model
	combined_model_path = os.path.join(
	downloadDirectory, name_of_model_file
	)

	if not os.path.exists(combined_model_path):
	with open(combined_model_path, 'wb') as content_file:
	content_file.write(downloadRequest.content)
	else:
	print("! Skipping model download for: " +
	name_of_model_file + ". Reason: File already exists.")

	# download picture
	combined_image_path = os.path.join(
	downloadDirectory, name_of_model_file_without_extention + ".preview.png")
	image_download_url = metadatajson["modelVersions"][0]["images"][0]["url"]
	pictureRequest = requests.get(image_download_url, allow_redirects=True)
	if not os.path.exists(combined_image_path):
	with open(combined_image_path, 'wb') as content_file:
	content_file.write(pictureRequest.content)
	else:
	print("! Skipping image download for: " +
	name_of_model_file + ". Reason: File already exists.")

	print("Downloading done.")


	def manualModelDownloading():
	requestURL = getrequestURL()
	downloadModelWithMetadata(requestURL)


	def downloadModelPage(metadatajson):

	for Model in metadatajson["items"]:
	ModelName = Model["name"]
	saveFolder = model_save_path + re.sub(r'[^\w\d-]', '_', ModelName)
	if not os.path.exists(saveFolder):
	os.makedirs(saveFolder)
	downloadModelWithMetadata(getrequestURL(Model["id"]), saveFolder)
	print("Done downloading: " + ModelName)
	print("--- Done downloading page: " +
	str(metadatajson["metadata"]["currentPage"]) + " ---")
	return


	def downloadModels(api_Model_url):
	# get all Models from highest to least highest rated
	# get first page
	# while True:
	response = requests.get(api_Model_url)
	while (response.status_code == 500 or response.status_code == 525):
	print("Request returned 505. Trying again in 5 seconds.")
	time.sleep(5)
	response = requests.get(api_Model_url)
	metadatajson = response.json()
	downloadModelPage(metadatajson)

	while (automaticModelDownload):
	newApiModelUrl = metadatajson["metadata"]["nextPage"]
	response = requests.get(newApiModelUrl)
	metadatajson = response.json()
	downloadModelPage(metadatajson)


	def main():
	if (manualDownload):
	# run at least once
	manualModelDownloading()
	while repeatManualDownloads:
	manualModelDownloading()
	else:
	downloadModels(api_model_mostdownloaded_url)

	main()