Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save schwittlick/a6a839b211060dcbf766d24b99e0ad1a to your computer and use it in GitHub Desktop.
Save schwittlick/a6a839b211060dcbf766d24b99e0ad1a to your computer and use it in GitHub Desktop.
This is a very basic no-frills scraper to retrieve the metadata and digital assets from all tokens minted on hicetnunc.xyz. I share this as a starting point for people who want to experiment with building alternative views on the works created on the platform or preserve the data. Feel free to improve upon this or add additional features.
import requests
import os
import ipfsapi
import concurrent.futures
api = ipfsapi.Client(host='https://ipfs.infura.io', port=5001)
url = "https://better-call.dev/v1/contract/mainnet/KT1RJ6PbjHpwc3M5rw5s2Nbmefwbuwbdxton/tokens"
r = requests.get(url)
data = r.json()
format2suffix = {}
format2suffix['image/png'] = "png"
format2suffix['image/jpeg'] = "jpg"
format2suffix['video/mp4'] = "mp4"
format2suffix['image/gif'] = "gif"
format2suffix['video/quicktime'] = "mov"
format2suffix['image/svg+xml'] = "svg"
format2suffix['audio/mpeg'] = "mpg"
format2suffix['application/pdf'] = "pdf"
format2suffix['image/tiff'] = "tif"
format2suffix['video/avi'] = "avi"
format2suffix['image/webp'] = "webp"
format2suffix['image/bmp'] = "bmp"
format2suffix['video/x-matroska'] = "mkv"
format2suffix['video/webm'] = "webm"
assetFolder = "assets/"
os.makedirs(assetFolder,exist_ok=True)
os.chdir(assetFolder)
print(len(data),"tokens")
def download(i):
mimeType = data[i]["token_info"]["formats"][0]["mimeType"]
uri = data[i]["token_info"]["formats"][0]["uri"].split("ipfs://")[1]
if mimeType in format2suffix:
saveName = str(data[i]["token_id"]) + "." + format2suffix[mimeType]
if not os.path.exists(saveName):
print("downloading", i, uri)
api.get(uri)
if os.path.exists(uri):
os.rename(uri, saveName)
parallel_dls = 6
for i in range(0, len(data), parallel_dls):
#print ("token data",data[i])
if "token_info" in data[i]:
if "formats" in data[i]["token_info"]:
with concurrent.futures.ThreadPoolExecutor(max_workers=parallel_dls) as executor:
executor.map(download, range(i, i+parallel_dls))
else:
print("unknown mime type:",data[i]["token_info"]["formats"][0]["mimeType"])
else:
print ("incomplete token data:",data[i])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment