Skip to content

Instantly share code, notes, and snippets.

@epc
Last active November 1, 2022 04:48
Show Gist options
  • Star 11 You must be signed in to star a gist
  • Fork 4 You must be signed in to fork a gist
  • Save epc/4118456 to your computer and use it in GitHub Desktop.
Save epc/4118456 to your computer and use it in GitHub Desktop.
python script to parse sphinx objects.inv file
#!/usr/bin/env python
""" Process URL for intersphinx targets and emit html or text """
def validuri(string):
return string
from sphinx.ext.intersphinx import read_inventory_v2
from posixpath import join
import pprint
import argparse
import locale
import os,sys,tempfile
import urllib2
parser = argparse.ArgumentParser(description='Process intersphinx link library')
parser.add_argument('--url' , type=validuri, help="URL to retrieve objects.inv from")
parser.add_argument('--file' , help="objects.inv format file")
group = parser.add_mutually_exclusive_group(required=False)
group.add_argument('--html', action='store_true', help="Output HTML")
group.add_argument('--terse', action='store_true', help="Output terse text list")
group.add_argument('--rst', action='store_true', help="Output ReStructuredText")
group.add_argument('--rewrite', action='store_true', help="Output short form and correct form of each link.")
args = parser.parse_args()
def start_role(role):
if (args.terse):
return
elif (args.rewrite):
return
elif (args.rst):
print role
else:
print "<dt>Role: {}</dt>\n<dd>\n<dl>\n".format(role)
def start_item(role,item):
if (args.terse):
return
elif (args.rewrite):
return
elif (args.rst):
print "\t:{}:{}:".format(role,item)
elif (args.html):
print "<dt>{}:{}</dt>\n".format(role,item)
print "<dd>"
print "<table>\n<tbody>"
def end_item(role,item):
if (args.html):
print "</tbody></table>"
print "</dd>\n"
def print_link(role,item,domain,title):
"""Return the correct link form, if no title then extended form."""
domain = domain.lower()
if (title == '')|(title=='-'):
linkStr = ":{}:`{} <{}:{}>`".format(role,item,domain,item)
else:
linkStr = ":{}:`{}:{}`".format(role,domain,item)
if (args.terse):
print linkStr
if (args.rewrite):
print ":{}:`{}:{}`".format(role,domain,item), "\t{}".format(linkStr)
elif (args.rst):
print "\t\t:Link:\t{}".format(linkStr)
elif (args.html):
print "<tr><th>Link:</th><td>{}</td></tr>".format(linkStr)
def end_role():
if (args.html):
print "</dl>\n"
print "</dd>\n"
def print_meta(role,item,domain,version,url,title):
if (args.terse):
return
elif (args.rewrite):
return
elif (args.rst):
print "\t\t:Domain:\t{}".format(domain)
print "\t\t:Version:\t{}".format(version)
print "\t\t:URL:\t{}".format(url)
print "\t\t:Title:\t{}".format(title)
elif (args.html):
print "<tr><th>Domain:</th><td>{}</td></tr>".format(domain)
print "<tr><th>Version:</th><td>{}</td></tr>".format(version)
print "<tr><th>URL:</th><td>{}</td></tr>".format(url)
print "<tr><th>Title:</th><td>{}</td></tr>".format(title)
return
def fetch_data(url,inv):
f = open(inv, 'rb')
line = f.readline() # burn a line
invdata = read_inventory_v2(f, url or '', join)
if (args.html):
print "<dl>"
for role in invdata:
start_role(role)
for item in invdata[role]:
(domain, version, url, title) = invdata[role][item]
#sys.stderr.write("{}:{} :: {}\n".format(role,item,invdata[role][item]))
start_item(role,item)
print_link(role,item,domain,title)
print_meta(role,item,domain,version,url,title)
end_item(role,item)
if (args.html):
print "</dl>\n"
if __name__ == "__main__":
if (args.file):
inv = args.file
# if os.path.exists(inv) == False:
# raise Exception('File "{}" does not exist'.format(inv))
else:
inv = False
if (args.url):
url = args.url
else:
url = False
# sys.stderr.write('URL({}) FN({})\n'.format(url,fn))
if (inv == False) & (url == False):
raise Exception("need to specify a file or URL")
if (inv != False ) & (url != ''):
#sys.stderr.write("Reading from: {}\n".format(inv))
#sys.stderr.write("Using: {} as base HREF\n".format(url))
fetch_data(url,inv)
elif (url != False):
# fetch URL into inv
#sys.stderr.write("Retrieving objects.inv from {}\n".format(url))
if (url.rfind('objects.inv')>5):
invdata = urllib2.urlopen(url)
else:
invdata = urllib2.urlopen(url + '/objects.inv')
sys.stderr.write('URL resolved to: {}\n '.format(invdata.geturl()))
#print invdata.read()
f = tempfile.NamedTemporaryFile()
f.write(invdata.read())
sys.stderr.write("objects.inv written to: {}\n".format(f.name))
sys.stderr.write("Using: {} as base HREF\n".format(url))
fetch_data(url,f.name)
else:
raise Exception("You need to specify a --URL")
@lebigot
Copy link

lebigot commented Apr 8, 2014

This looks useful, but for me python inventory.py --file objects.inv fails with:

Traceback (most recent call last):
  File "inventory.py", line 139, in <module>
    fetch_data(url,inv)
  File "inventory.py", line 102, in fetch_data
    invdata = read_inventory_v2(f, url, join)
  File "/opt/local/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/sphinx/ext/intersphinx.py", line 118, in read_inventory_v2
    location = join(uri, location)
  File "/opt/local/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/posixpath.py", line 77, in join
    elif path == '' or path.endswith('/'):
AttributeError: 'bool' object has no attribute 'endswith'

@gmr
Copy link

gmr commented Apr 28, 2014

There's a small fix in my fork of this, change line 102 from:

invdata = read_inventory_v2(f, url, join)

to

invdata = read_inventory_v2(f, url or '', join)

@mearns
Copy link

mearns commented May 19, 2014

I was getting weird permission errors trying to read the tempfile. Instead of debugging it, I just forked it and replaced the tempfile with a StringIO buffer. Also added a --output file, because in some cases I get a weird error trying to redirect the output.

Actually, my fork is of "gmr"'s fork, in case you're looking for it.

@epc
Copy link
Author

epc commented Mar 26, 2015

Slowest delayed reply ever: apologies for not seeing these comments last year, I didn't receive any notifications.

@yoavram
Copy link

yoavram commented Oct 7, 2015

Also on Windows it fails, you should change from posixos import join to from os.path import join

@acsr
Copy link

acsr commented Jan 27, 2019

You can now call sphinx.ext.intersphinx directly to output cleartext from objects.inv

[Your_Sphinx_venv]/bin/python -m sphinx.ext.intersphinx.py [path or URL] > output.txt

The output uses justified text instead of tabs for better display in a terminal. To output fully tabbed csv you can tweak the print output of the inventory_main function at the end.

@acsr
Copy link

acsr commented Jan 28, 2019

here is the link to my gist with the pure tab-delimited output https://gist.github.com/acsr/fb7b5cf97627f70876ceeae031cd37f1#file-sphinx-ext-intersphinx-py

@epc
Copy link
Author

epc commented Jul 20, 2021

In case anyone is 1) still using this and 2) has comments, please send them to me or call attention to me, for whatever reason I never get notifications on this gist. I'm @epc on Twitter and epc at epcostello dot com via email.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment