Skip to content

Instantly share code, notes, and snippets.

@epc
Last active November 1, 2022 04:48
Show Gist options
  • Star 11 You must be signed in to star a gist
  • Fork 4 You must be signed in to fork a gist
  • Save epc/4118456 to your computer and use it in GitHub Desktop.
Save epc/4118456 to your computer and use it in GitHub Desktop.
python script to parse sphinx objects.inv file
#!/usr/bin/env python
""" Process URL for intersphinx targets and emit html or text """
def validuri(string):
return string
from sphinx.ext.intersphinx import read_inventory_v2
from posixpath import join
import pprint
import argparse
import locale
import os,sys,tempfile
import urllib2
parser = argparse.ArgumentParser(description='Process intersphinx link library')
parser.add_argument('--url' , type=validuri, help="URL to retrieve objects.inv from")
parser.add_argument('--file' , help="objects.inv format file")
group = parser.add_mutually_exclusive_group(required=False)
group.add_argument('--html', action='store_true', help="Output HTML")
group.add_argument('--terse', action='store_true', help="Output terse text list")
group.add_argument('--rst', action='store_true', help="Output ReStructuredText")
group.add_argument('--rewrite', action='store_true', help="Output short form and correct form of each link.")
args = parser.parse_args()
def start_role(role):
if (args.terse):
return
elif (args.rewrite):
return
elif (args.rst):
print role
else:
print "<dt>Role: {}</dt>\n<dd>\n<dl>\n".format(role)
def start_item(role,item):
if (args.terse):
return
elif (args.rewrite):
return
elif (args.rst):
print "\t:{}:{}:".format(role,item)
elif (args.html):
print "<dt>{}:{}</dt>\n".format(role,item)
print "<dd>"
print "<table>\n<tbody>"
def end_item(role,item):
if (args.html):
print "</tbody></table>"
print "</dd>\n"
def print_link(role,item,domain,title):
"""Return the correct link form, if no title then extended form."""
domain = domain.lower()
if (title == '')|(title=='-'):
linkStr = ":{}:`{} <{}:{}>`".format(role,item,domain,item)
else:
linkStr = ":{}:`{}:{}`".format(role,domain,item)
if (args.terse):
print linkStr
if (args.rewrite):
print ":{}:`{}:{}`".format(role,domain,item), "\t{}".format(linkStr)
elif (args.rst):
print "\t\t:Link:\t{}".format(linkStr)
elif (args.html):
print "<tr><th>Link:</th><td>{}</td></tr>".format(linkStr)
def end_role():
if (args.html):
print "</dl>\n"
print "</dd>\n"
def print_meta(role,item,domain,version,url,title):
if (args.terse):
return
elif (args.rewrite):
return
elif (args.rst):
print "\t\t:Domain:\t{}".format(domain)
print "\t\t:Version:\t{}".format(version)
print "\t\t:URL:\t{}".format(url)
print "\t\t:Title:\t{}".format(title)
elif (args.html):
print "<tr><th>Domain:</th><td>{}</td></tr>".format(domain)
print "<tr><th>Version:</th><td>{}</td></tr>".format(version)
print "<tr><th>URL:</th><td>{}</td></tr>".format(url)
print "<tr><th>Title:</th><td>{}</td></tr>".format(title)
return
def fetch_data(url,inv):
f = open(inv, 'rb')
line = f.readline() # burn a line
invdata = read_inventory_v2(f, url or '', join)
if (args.html):
print "<dl>"
for role in invdata:
start_role(role)
for item in invdata[role]:
(domain, version, url, title) = invdata[role][item]
#sys.stderr.write("{}:{} :: {}\n".format(role,item,invdata[role][item]))
start_item(role,item)
print_link(role,item,domain,title)
print_meta(role,item,domain,version,url,title)
end_item(role,item)
if (args.html):
print "</dl>\n"
if __name__ == "__main__":
if (args.file):
inv = args.file
# if os.path.exists(inv) == False:
# raise Exception('File "{}" does not exist'.format(inv))
else:
inv = False
if (args.url):
url = args.url
else:
url = False
# sys.stderr.write('URL({}) FN({})\n'.format(url,fn))
if (inv == False) & (url == False):
raise Exception("need to specify a file or URL")
if (inv != False ) & (url != ''):
#sys.stderr.write("Reading from: {}\n".format(inv))
#sys.stderr.write("Using: {} as base HREF\n".format(url))
fetch_data(url,inv)
elif (url != False):
# fetch URL into inv
#sys.stderr.write("Retrieving objects.inv from {}\n".format(url))
if (url.rfind('objects.inv')>5):
invdata = urllib2.urlopen(url)
else:
invdata = urllib2.urlopen(url + '/objects.inv')
sys.stderr.write('URL resolved to: {}\n '.format(invdata.geturl()))
#print invdata.read()
f = tempfile.NamedTemporaryFile()
f.write(invdata.read())
sys.stderr.write("objects.inv written to: {}\n".format(f.name))
sys.stderr.write("Using: {} as base HREF\n".format(url))
fetch_data(url,f.name)
else:
raise Exception("You need to specify a --URL")
@yoavram
Copy link

yoavram commented Oct 7, 2015

Also on Windows it fails, you should change from posixos import join to from os.path import join

@acsr
Copy link

acsr commented Jan 27, 2019

You can now call sphinx.ext.intersphinx directly to output cleartext from objects.inv

[Your_Sphinx_venv]/bin/python -m sphinx.ext.intersphinx.py [path or URL] > output.txt

The output uses justified text instead of tabs for better display in a terminal. To output fully tabbed csv you can tweak the print output of the inventory_main function at the end.

@acsr
Copy link

acsr commented Jan 28, 2019

here is the link to my gist with the pure tab-delimited output https://gist.github.com/acsr/fb7b5cf97627f70876ceeae031cd37f1#file-sphinx-ext-intersphinx-py

@epc
Copy link
Author

epc commented Jul 20, 2021

In case anyone is 1) still using this and 2) has comments, please send them to me or call attention to me, for whatever reason I never get notifications on this gist. I'm @epc on Twitter and epc at epcostello dot com via email.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment