greyblue9/pip_search.py

## pip_search.py
#!/usr/bin/env python3
from datetime import datetime
from sys import argv
from bs4 import BeautifulSoup, GuessedAtParserWarning, Tag
from warnings import simplefilter
from urllib.request import Request, urlopen
from urllib.parse import quote_plus, urljoin
simplefilter(category=GuessedAtParserWarning,action="ignore")

query = " ".join(argv[1:]) if argv[1:] else "gif"
url = "https://pypi.org/search/?q={query}".format(query=quote_plus(query))

with urlopen(Request(method="GET", url=url)) as f:
  doc = BeautifulSoup(f.read())

def find_name(cur: Tag) -> str:
  while cur and not (
    cur.attrs.get("class")
    and any(c.startswith("package-") for c in cur["class"])
  ):
    cur = cur.parent
  if not cur: return ""
  for cls in cur["class"]:
    if not cls.startswith("package-"):
      continue
    k = cls.removeprefix("package-")
    return k.removeprefix("snippet__")
  return cur.name.lower()

el = doc.select('[class^="package-"]')[0]
while el in el.parent.select(":first-child:last-child"):
  el = el.parent

elems = [c for c in el.parent.children if isinstance(c, Tag)]
infos = [
  {
    find_name(e)
    :
    next(
      next(
        (x.string or x.text).strip()
        for x in e
        if (x.string or x.text).strip()
      )
      if isinstance(e, Tag)
      else str(e).strip()
      for c in e
      if str(c).strip()
    )
    for e in elems.select('[class^="package-"]')
  }
  for idx, elems in enumerate(elems)
]

for idx, elem in enumerate(elems):
  data = infos[idx]
  links = (
    ([elem] if elem.name.lower() == "a" else [])
    + list(elem.select("a[href]"))
  )
  for link in links:
    rel_href = link.attrs.get("href")
    abs_href = urljoin(url, rel_href)
    k = "url" if "url" not in data else find_name(link)
    data[k] = abs_href

  time_elems = elem.select("time[datetime]")
  for t in time_elems:
    datetime_s = t["datetime"].replace("+0000","+00:00").replace("Z", "+00:00")
    datetime_o = datetime.fromisoformat(datetime_s)
    data[find_name(t)] = datetime_o
  if "snippet" in data: del data["snippet"]

datas = {i["name"]: i for i in infos}

from pprint import pp
pp(datas)
	#!/usr/bin/env python3
	from datetime import datetime
	from sys import argv
	from bs4 import BeautifulSoup, GuessedAtParserWarning, Tag
	from warnings import simplefilter
	from urllib.request import Request, urlopen
	from urllib.parse import quote_plus, urljoin
	simplefilter(category=GuessedAtParserWarning,action="ignore")

	query = " ".join(argv[1:]) if argv[1:] else "gif"
	url = "https://pypi.org/search/?q={query}".format(query=quote_plus(query))

	with urlopen(Request(method="GET", url=url)) as f:
	doc = BeautifulSoup(f.read())

	def find_name(cur: Tag) -> str:
	while cur and not (
	cur.attrs.get("class")
	and any(c.startswith("package-") for c in cur["class"])
	):
	cur = cur.parent
	if not cur: return ""
	for cls in cur["class"]:
	if not cls.startswith("package-"):
	continue
	k = cls.removeprefix("package-")
	return k.removeprefix("snippet__")
	return cur.name.lower()

	el = doc.select('[class^="package-"]')[0]
	while el in el.parent.select(":first-child:last-child"):
	el = el.parent

	elems = [c for c in el.parent.children if isinstance(c, Tag)]
	infos = [
	{
	find_name(e)
	:
	next(
	next(
	(x.string or x.text).strip()
	for x in e
	if (x.string or x.text).strip()
	)
	if isinstance(e, Tag)
	else str(e).strip()
	for c in e
	if str(c).strip()
	)
	for e in elems.select('[class^="package-"]')
	}
	for idx, elems in enumerate(elems)
	]

	for idx, elem in enumerate(elems):
	data = infos[idx]
	links = (
	([elem] if elem.name.lower() == "a" else [])
	+ list(elem.select("a[href]"))
	)
	for link in links:
	rel_href = link.attrs.get("href")
	abs_href = urljoin(url, rel_href)
	k = "url" if "url" not in data else find_name(link)
	data[k] = abs_href

	time_elems = elem.select("time[datetime]")
	for t in time_elems:
	datetime_s = t["datetime"].replace("+0000","+00:00").replace("Z", "+00:00")
	datetime_o = datetime.fromisoformat(datetime_s)
	data[find_name(t)] = datetime_o
	if "snippet" in data: del data["snippet"]

	datas = {i["name"]: i for i in infos}

	from pprint import pp
	pp(datas)