Skip to content

Instantly share code, notes, and snippets.

@enriched
Created January 19, 2024 00:39
Show Gist options
  • Save enriched/8d76abd68f3dd3d8875591b73152f01b to your computer and use it in GitHub Desktop.
Save enriched/8d76abd68f3dd3d8875591b73152f01b to your computer and use it in GitHub Desktop.
Get rpm metadata files for repository
import os
from urllib.parse import urljoin
import urllib
import gzip
from pathlib import Path, PurePath
import requests
from bs4 import BeautifulSoup
NOTEBOOK_DIR = os.getcwd()
REPODATA_DIR = Path(NOTEBOOK_DIR) / "repodata"
REPOCACHE_DIR = REPODATA_DIR / "cache"
if not REPODATA_DIR.exists():
os.mkdir(REPODATA_DIR)
if not REPOCACHE_DIR.exists():
os.mkdir(REPOCACHE_DIR)
BASEARCH = "x86_64"
CENTOS_MIRROR_URL = "http://mirror.centos.org/centos/"
CENTOS7_BASEOS_URL = urljoin(CENTOS_MIRROR_URL, f"7/os/{BASEARCH}/")
# Get repomd.xml
repomd_request = requests.get(f"{CENTOS7_BASEOS_URL}/repodata/repomd.xml")
repomd_content = repomd_request.content
repomd_path = REPODATA_DIR / "repomd.xml"
repomd_path.write_bytes(repomd_content)
repomd = BeautifulSoup(repomd_request.content, "xml")
def get_metadata_file(type: str) -> PurePath:
metadata_file_href = repomd.find_all("data", type=type)[0].location["href"]
metadata_filename: str = os.path.basename(metadata_file_href)
metadata_file_url = urllib.parse.urljoin(CENTOS7_BASEOS_URL, metadata_file_href)
request = requests.get(metadata_file_url)
metadata_file_path = REPODATA_DIR / metadata_filename
metadata_file_path.write_bytes(request.content)
if metadata_file_path.suffix == ".gz":
metadata_file_decomp_path = metadata_file_path.with_suffix("")
metadata_file_decomp_path.write_bytes(gzip.decompress(request.content))
return metadata_file_path
# Get primary.xml
primary_file_path = get_metadata_file("primary")
# Get filelists.xml
filelists_file_path = get_metadata_file("filelists")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment