Skip to content

Instantly share code, notes, and snippets.

@un1tz3r0
Created June 29, 2024 02:29
Show Gist options
  • Save un1tz3r0/9b95330b3f4cd50b2c3d5df556d2720f to your computer and use it in GitHub Desktop.
Save un1tz3r0/9b95330b3f4cd50b2c3d5df556d2720f to your computer and use it in GitHub Desktop.
Python: get list of ollama models from ollama.com/library

Scrape model listings from ollama.com/library

This is some helper code I wrote to list models available on ollama.com. It returns a list of models with a little metadata about each one, just what's visible on the site.

Useful mostly for command line junkies like myself, so you can see what options you have for doing ollama pull <modelname> when downloading new models to try out.

import aiohttp, asyncio, yarl, bs4, re, json
async def get_ollama_library(**query):
def parseage(s):
s = str(s).lower().strip()
if s.endswith(" ago"):
s = " ".join(s.split(" ")[:-1])
m = re.match("^(\\d*(\\.\\d*)?) *(d(ay)?|h(ou)?r?|mo(nth)?|m(in(ute)?)?|s(ec(ond)?)?|w((ee)?k)?|y((ea)?r)?)s? ago", s)
if m != None:
num = float(m.group(1))
unit = m.group(3).lower().strip()
units = [('s', 1.0), ('mo', 60.0*60.0*24.0*30.0), ('m', 60.0), ('h', 60.0*60.0), ('d', 60.0*60.0*24.0), ('w', 60*60*24*7), ('y', 60*60*24*365)]
for prefix, factor in units:
if unit.startswith(prefix):
return num * factor
return s
async with aiohttp.ClientSession() as sess:
url = yarl.URL("https://ollama.com/library")
url.update_query(dict(query))
res = await sess.get(str(url))
doc = bs4.BeautifulSoup(await res.text())
def getmodelinfo(elem):
name = elem.find("h2").text.strip().splitlines()[0]
desc = elem.find_all("p")[0].text.strip()
info = {}
if len(elem.find_all("p")) > 1:
for p in elem.find_all("p")[1:]:
for k,v in [tuple(sorted([w.strip().lower().replace(',','') for w in l.split("\xa0")], key=lambda w: any([ch.isnumeric() for ch in str(w)]) )) for l in p.text.strip().split("\n\n\n") if len(l.strip()) > 0]:
info[k] = float(v) if all([ch.isnumeric() or ch in ['-','.'] for ch in str(v)]) else parseage(v) if str(v).endswith(" ago") else str(v)
return dict(list(info.items()) + list({'name': name, 'desc': desc}.items()))
modelinfos = []
for h2elem in doc.find_all("h2"):
minfo = getmodelinfo(h2elem.parent.parent)
print(f"{repr(minfo)}")
modelinfos.append(minfo)
return modelinfos, doc
def get_models():
import asyncio
return asyncio.run(get_ollama_library())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment