Created
February 24, 2015 02:08
-
-
Save iamaziz/fbcdb3827ee519617676 to your computer and use it in GitHub Desktop.
Generate Dash docset for Kivy
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#---------------------------------- | |
# built-in packages | |
import sqlite3 | |
import os | |
import urllib | |
import plistlib | |
#---------------------------------- | |
# third party packages + httrack | |
import requests | |
from bs4 import BeautifulSoup as bs | |
# download html documentation | |
def get_html(docname, url, download_html=False): | |
cmd_command = """ | |
cd ~/Desktop && | |
rm -rf {0} && | |
mkdir -p {0}/Contents/Resources/Documents && | |
cd {0} && | |
httrack -%v2 -T60 -R99 --sockets=7 -%c1000 -c10 -A999999999 -%N0 --disable-security-limits -F 'Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.19 (KHTML, like Gecko) Ubuntu/11.10 Chromium/18.0.1025.168' --mirror --keep-alive --robots=0 "{1}" -n -* +*.css +*css.php +*.ico +*/fonts/* +*.svg +*.ttf +fonts.googleapis.com* +*.woff +*.eot +*.png +*.jpg +*.gif +*.jpeg +*.js +{1}* -github.com* +raw.github.com* && | |
rm -rf hts-* && | |
mkdir -p Contents/Resources/Documents && | |
mv -f *.* Contents/Resources/Documents/ | |
""".format(docname, url) | |
if download_html: | |
os.system(cmd_command) | |
def update_db(name, typ, path): | |
try: | |
cur.execute("SELECT rowid FROM searchIndex WHERE path = ?", (path,)) | |
dbpath = cur.fetchone() | |
cur.execute("SELECT rowid FROM searchIndex WHERE name = ?", (name,)) | |
dbname = cur.fetchone() | |
if dbpath is None and dbname is None: | |
cur.execute('INSERT OR IGNORE INTO searchIndex(name, type, path) VALUES (?,?,?)', (name, typ, path)) | |
print('DB add >> name: {0} | type: {1} | path: {2}'.format(name, typ, path)) | |
else: | |
print("record exists") | |
except: | |
pass | |
def add_infoplist(base_page): | |
index_file = base_page.split("//")[1] | |
name = docset_name.split('.')[0] | |
plist_path = os.path.join(docset_name, "Contents", "Info.plist") | |
plist_cfg = { | |
'CFBundleIdentifier': name, | |
'CFBundleName': name, | |
'DocSetPlatformFamily': name.lower(), | |
'DashDocSetFamily': 'python', | |
'isDashDocset': True, | |
'dashIndexFilePath': index_file | |
} | |
plistlib.writePlist(plist_cfg, plist_path) | |
def add_urls(pages): | |
# loop through index pages: | |
for p in pages: | |
# setup paths | |
page_name = pages[p].split('/')[-1] | |
base_path = pages[p].split("//")[1] | |
# soup each index page | |
html = requests.get(pages[p]).text | |
soup = bs(html) | |
for a in soup.findAll('a', class_='reference internal'): | |
name = a.text.strip() | |
path = a.get('href') | |
name = " ".join(name.split()) | |
if path is not None: | |
path = base_path + path | |
update_db(name, p, path) | |
def main(): | |
# docset settings | |
global docset_name | |
docset_name = 'Kivy.docset' | |
output = docset_name + '/Contents/Resources/Documents/' | |
# docset directory | |
if not os.path.exists(output): | |
os.makedirs(output) | |
# docset icon | |
icon = 'http://kivy.org/logos/kivy-logo-black-256.png' | |
urllib.urlretrieve(icon, docset_name + "/icon.png") | |
# index pages | |
base_page = 'http://kivy.org/docs/' | |
pages = {'Guide': base_page,} | |
# download html | |
get_html(docset_name, base_page, False) | |
# create and connect to SQLite | |
db = sqlite3.connect(docset_name + '/Contents/Resources/docSet.dsidx') | |
global cur | |
cur = db.cursor() | |
try: | |
cur.execute('DROP TABLE searchIndex;') | |
except: | |
cur.execute('CREATE TABLE searchIndex(id INTEGER PRIMARY KEY, name TEXT, type TEXT, path TEXT);') | |
cur.execute('CREATE UNIQUE INDEX anchor ON searchIndex (name, type, path);') | |
# docset entries | |
add_urls(pages) | |
add_infoplist(base_page) | |
# report num of entries | |
cur.execute('Select count(*) from searchIndex;') | |
entry = cur.fetchone() | |
print("{} entry.".format(entry)) | |
# commit and close db | |
db.commit() | |
db.close() | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment