Skip to content

Instantly share code, notes, and snippets.

@clarete
Created October 29, 2017 17:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save clarete/2f7ad566588c056d44044a76b8bc1200 to your computer and use it in GitHub Desktop.
Save clarete/2f7ad566588c056d44044a76b8bc1200 to your computer and use it in GitHub Desktop.
# Parse online cURL doc to generate binding definitions file
import os
import io
import urlparse
import re
import lxml
import lxml.html
import requests
DOCUMENTATION_URL = 'https://curl.haxx.se/libcurl/c/allfuncs.html'
def savefile(name, url):
response = requests.get(url)
if not response.ok: raise Exception('Response Not OK for ' + url)
content = response.content.decode('UTF-8')
io.open(name, 'wb').write(content)
return content
def cachedurl(url):
name = 'curldocscraper-' + url.replace('/', '-').replace(':', '-')
return (os.path.exists(name) and io.open(name).read() or
savefile(name, url))
def getfun():
return cachedurl(DOCUMENTATION_URL)
def findallfun(funtext):
tree = lxml.html.fromstring(funtext)
return [x.attrib['href']
for x in tree.xpath('//div[@class="contents"]/p/a')]
def readonefun(onefun):
try:
soup = cachedurl(urlparse.urljoin(DOCUMENTATION_URL, onefun))
except Exception:
return "// Couldn't retrieve " + onefun
# Here we start the nasty scrapping
blob = soup[soup.find('SYNOPSIS</h2>') + 14:
soup.find('<a name="DESCRIPTION">')]
lines = (re.sub("<[^>]*>", "", blob)
.replace('&lt;', '<')
.replace('&gt;', '>')
.replace('&nbsp;', '')
.strip()
.split('\n')[1:]) # 1: Takes the include off
nospaces = "".join(line.strip() for line in lines)
newlines = nospaces.split(';')
signatures = "\n".join(
"//sys " + line.strip() for line in newlines if line.strip())
return signatures
def geteachfun(allfun):
return "\n".join(readonefun(x) for x in allfun)
def main():
print('//sys #include <curl/curl.h>')
print(geteachfun(findallfun(getfun())))
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment