Skip to content

Instantly share code, notes, and snippets.

@druwynings
Created July 27, 2015 20:53
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save druwynings/0ebe00df4255641d0f76 to your computer and use it in GitHub Desktop.
Save druwynings/0ebe00df4255641d0f76 to your computer and use it in GitHub Desktop.
docs generators
product api-product_v3.html
fields_default: type,pageUrl,resolvedPageUrl,title,text,brand,offerPrice,regularPrice,shippingAmount,saveAmount,priceRange(minPrice,maxPrice),quantityPrices(minQuantity,price),offerPriceDetails,regularPriceDetails,saveAmountDetails,productId,upc,sku,mpn,isbn,specs,images(url,title,naturalHeight,naturalWidth,primary,xpath,diffbotUri),discussion,prefixCode,productOrigin,humanLanguage,diffbotUri
fields_optional: links,meta,querystring,breadcrumb
fields_experimental: availability,colors,size
arguments_default: token,url
arguments_optional: fields,discussion,timeout,callback
article api-article_v3.html
fields_default: type,title,text,html,date,estimatedDate,author,authorUrl,discussion,humanLanguage,numPages,nextPages,siteName,publisherRegion,publisherCountry,pageUrl,resolvedPageUrl,tags(label,count,prevalence,type,uri),images(url,title,height,width,naturalHeight,naturalWidth,primary,diffbotUri),videos(url,naturalHeight,naturalWidth,primary,diffbotUri),diffbotUri
fields_optional:sentiment,links,meta,querystring,breadcrumb
arguments_default: token,url
arguments_optional: fields,paging,maxTags,discussion,timeout,callback
codesample_1: http://api.diffbot.com/v3/article?token=john&url=http%3A%2F%2Fblog.diffbot.com%2Fdiffbots-new-product-api-teaches-robots-to-shop-online
image api-image_v3.html
fields_default: type,url,title,naturalHeight,naturalWidth,humanLanguage,anchorUrl,pageUrl,resolvedPageUrl,xpath,diffbotUri
fields_optional: displayHeight,displayWidth,links,meta,querystring,breadcrumb
fields_experimental: mentions,ocr,faces
arguments_default: token,url
arguments_optional: fields,timeout,callback
analyze api-analyze_v3.html
arguments_default: token,url
arguments_optional: fields,mode,discussion,timeout,callback
fields_default: title,type,humanLanguage
fields_optional: links,meta,querystring,breadcrumb
semantria api-semantria.html
arguments_default: textAnalysis,semantriaKey,semantriaSecret
discussion api-discussion_v3.html
arguments_default: token,url
arguments_optional: fields,timeout,callback,maxPages
fields_default: type,pageUrl,resolvedPageUrl,title,numPosts,posts(type,id,parentId,text,html,tags,humanLanguage,images,date,author,authorUrl,pageUrl,diffbotUri),tags,participants,numPages,nextPage,nextPages,provider,humanLanguage,rssUrl,diffbotUri
fields_optional: sentiment,links,meta,querystring,breadcrumb
codesample_1: http://api.diffbot.com/v3/discussion?version=3&token=john&url=https%3A%2F%2Fnews.ycombinator.com%2Fitem%3Fid%3D5608988
video api-video_v3.html
arguments_default: token,url
arguments_optional: fields,timeout,callback
fields_default: type,pageUrl,resolvedPageUrl,title,text,url,html,embedUrl,author,date,duration,viewCount,naturalHeight,naturalWidth,images(url,title),mime,humanLanguage,diffbotUri
fields_optional: links,meta,querystring,breadcrumb
bulk api-bulk.html
codesample_1: https://api.diffbot.com/v3/bulk?token=doctoken&name=bulkJob
crawl api-crawlbot_v2.html
codesample_1: https://api.diffbot.com/v3/crawl?token=doctoken&name=crawlJob
{
"arguments": {
"breadcrumb": {
"default": "Returns a top-level array (<code>breadcrumb</code>) of URLs and link text from page breadcrumbs."
},
"callback": {
"default": "Use for jsonp requests. Needed for cross-domain ajax."
},
"discussion": {
"analyze": "Pass <code>discussion=false</code> to disable automatic extraction of comments or reviews from pages identified as articles or products. This will not affect pages identified as discussions.",
"default": "Pass <code>discussion=false</code> to disable automatic extraction of article comments. See <a href='#discussion'>below</a>.",
"product": "Pass <code>discussion=false</code> to disable automatic extraction of product reviews. See <a href='#discussion'>below</a>."
},
"fields": {
"analyze": "Specify optional fields to be returned from any fully-extracted pages, e.g.: <code>&fields=querystring,links</code>.<br><br>See available fields within each API's individual documentation pages.",
"default": "Used to specify optional fields to be returned by the {API} API. See the <a href=\"#fields\">Fields</a> section below."
},
"links": {
"default": "Returns a top-level object (<code>links</code>) containing all hyperlinks found on the page."
},
"maxPages": {
"default": "Set the maximum number of pages in a thread to automatically concatenate in a single response. Default = 1 (no concatenation). Set <code>maxPages=all</code> to retrieve all pages of a thread regardless of length. Each individual page will count as a separate API call."
},
"maxTags": {
"default": "Set the maximum number of automatically-generated tags to return. By default a maximum of five tags will be returned."
},
"meta": {
"default": "Returns a top-level object (<code>meta</code>) containing the full contents of page <code>meta</code> tags, including sub-arrays for <a href=\"http://ogp.me/\" target=\"_new\">OpenGraph</a> tags, <a href=\"https://dev.twitter.com/docs/cards/markup-reference\" target=\"_new\">Twitter Card</a> metadata, <a href=\"http://www.schema.org\" target=\"_new\">schema.org</a> microdata, and -- if available -- <a href=\"http://www.oembed.com\" target=\"_new\">oEmbed</a> metadata."
},
"mode": {
"default": "By default the Analyze API will fully extract all pages that match an existing Automatic API -- articles, products or image pages. Set <code>mode</code> to a specific page-type (e.g., <code>mode=article</code>) to extract content only from that specific page-type. All other pages will simply return the default Analyze fields."
},
"optional": "Optional arguments",
"paging": {
"default": "Pass <code>paging=false</code> to disable automatic concatenation of multiple-page articles. (By default, Diffbot will concatenate up to 20 pages of a single article.) <a href=\"http://support.diffbot.com/automatic-apis/handling-multiple-page-articles\">More on automatic concatenation</a>."
},
"querystring": {
"default": "Returns any key/value pairs present in the URL querystring. Items without a discrete value will be returned as <code>true</code>."
},
"semantriaKey": {
"default": "Include your Semantria API key (available from your Semantria dashboard)."
},
"semantriaSecret": {
"default": "Include your Semantria API secret (available from your Semantria dashboard)."
},
"stats": {
"default": "Returns statistics on page classification, including the array of individual page-types that comprise the \"other\" category, and Diffbot confidence score for each type. <a href=\"#pagetypes\">See all page-types</a>."
},
"textAnalysis": {
"default": "Pass either <code>&textAnalysis</code> or request the field <code>textAnalysis</code> to return the Semantria-powered object in your response."
},
"timeout": {
"default": "Set a value in milliseconds to terminate the response. By default the {API} API has a 30-second (30000) timeout."
},
"token": {
"default": "Developer token"
},
"url": {
"default": "Web page URL of the {api} to process (URL encoded)"
}
},
"fields": {
"anchorUrl": {
"default": "If the image is hyperlinked, returns the destination URL."
},
"attrAlt": {
"default": "Contents of the image's <code>alt</code> attribute."
},
"attrTitle": {
"default": "Contents of the image's <code>title</code> attribute."
},
"author": {
"default": "{API} author.",
"video": "{API} uploader or creator, if available."
},
"authorUrl": {
"default": "URL of the author profile page, if available."
},
"availability": {
"default": "Item's availability, either <code>true</code> or <code>false</code>."
},
"brand": {
"default": "Item's brand name."
},
"breadcrumb": {
"default": "Returns a top-level array (<code>breadcrumb</code>) of URLs and link text from page breadcrumbs."
},
"colors": {
"default": "Returns an array of hex values of the dominant colors within the image.",
"product": "Returns array of product color options."
},
"date": {
"default": "Date of extracted {api}, normalized in most cases to <a href=\"http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.3\">RFC 1123 (HTTP/1.1)</a>.",
"image": "Date associated with the image, if available in page content."
},
"diffbotUri": {
"default": "Unique object ID. The <code>diffbotUri</code> is generated from the values of various {API} fields and uniquely identifies the object. This can be used for deduplication."
},
"discussion": {
"default": "Article comments, as extracted by the Diffbot Discussion API. See <a href='#discussion'>below</a>.",
"product": "Product reviews, as extracted by the Diffbot Discussion API. See <a href='#discussion'>below</a>."
},
"duration": {
"default": "Duration in seconds of the {API}."
},
"embedUrl": {
"default": "Embeddable URL, if available."
},
"estimatedDate": {
"default": "If an article's date is ambiguous, Diffbot will attempt to estimate a more specific timestamp using various factors. This will not be generated for articles older than two days, or articles without an identified <code>date</code>."
},
"experimental": "The following fields are in an early beta stage:",
"faces": {
"default": "The x, y, height and width of coordinates of human faces. Returns null if no faces are found."
},
"height": {
"default": "Height of image as (re-)sized via browser/CSS."
},
"html": {
"default": "Diffbot-normalized HTML of the extracted article. Please see the <a href=\"/dev/docs/article/html\">HTML Specification</a> for a breakdown of elements and attributes returned.",
"video": "Embeddable HTML of the video (if available), typically an <code>IFRAME</code> or <code>VIDEO</code> object."
},
"humanLanguage": {
"default": "Returns the (spoken/human) language of the submitted page, using two-letter <a href=\"http://en.wikipedia.org/wiki/List_of_ISO_639-1_codes\" target=\"_blank\">ISO 639-1 nomenclature</a>.",
"discussion": "Spoken/human language of the discussion / comment thread, using two-letter <a href=\"http://en.wikipedia.org/wiki/List_of_ISO_639-1_codes\" target=\"_blank\">ISO 639-1 nomenclature</a>."
},
"images": {
"article": "Array of images, if present within the article body.",
"default": "Array of images, if present within the {api}.",
"subfields": {
"diffbotUri": {
"default": "Internal ID used for indexing."
},
"height": {
"default": "Height of image as (re-)sized via browser/CSS."
},
"naturalHeight": {
"default": "Raw image height, in pixels."
},
"naturalWidth": {
"default": "Raw image width, in pixels."
},
"primary": {
"default": "Returns <code>true</code> if image is identified as primary based on visual analysis."
},
"text": {
"default": "Diffbot-determined best caption for or description of the image."
},
"title": {
"default": "Description or caption of the image."
},
"url": {
"default": "Fully resolved link to image. If the image <code>SRC</code> is encoded as base64 data, the complete data URI will be returned."
},
"width": {
"default": "Width of image as (re-)sized via browser/CSS."
},
"xpath": {
"default": "XPath expression identifying the image node."
}
}
},
"isbn": {
"default": "International Standard Book Number (ISBN), if available."
},
"links": {
"default": "Returns a top-level object (<code>links</code>) containing all hyperlinks found on the page."
},
"mentions": {
"default": "Array of articles upon which the same or similar image may be found."
},
"meta": {
"default": "Returns a top-level object (<code>meta</code>) containing the full contents of page <code>meta</code> tags, including sub-arrays for <a href=\"http://ogp.me/\" target=\"_new\">OpenGraph</a> tags, <a href=\"https://dev.twitter.com/docs/cards/markup-reference\" target=\"_new\">Twitter Card</a> metadata, <a href=\"http://www.schema.org\" target=\"_new\">schema.org</a> microdata, and -- if available -- <a href=\"http://www.oembed.com\" target=\"_new\">oEmbed</a> metadata.",
"image": "Comma-separated list of image-embedded metadata (e.g., EXIF, XMP, ICC Profile), if available within the image file."
},
"mime": {
"default": "MIME type, if available, as specified by the {API}'s \"Content-Type.\""
},
"mpn": {
"default": "Manufacturer's Product Number."
},
"naturalHeight": {
"default": "Raw image height, in pixels.",
"video": "Raw video height, if available, in pixels."
},
"naturalWidth": {
"default": "Raw image width, in pixels.",
"video": "Raw video width, if available, in pixels."
},
"displayHeight": {
"default": "Height of image as presented in the browser (and as sized via browser/CSS, if resized)."
},
"displayWidth": {
"default": "Width of image as presented in the browser (and as sized via browser/CSS, if resized)."
},
"nextPage": {
"default": "If {api} spans multiple pages, <code>nextPage</code> will return the subsequent page URL."
},
"nextPages": {
"default": "Array of all page URLs concatenated in a multipage {api}. <a href=\"http://support.diffbot.com/automatic-apis/handling-multiple-page-articles/\">More on automatic concatenation</a>."
},
"numPages": {
"default": "Number of pages automatically concatenated to form the <code>text</code> or <code>html</code> response. By default, Diffbot will automatically concatenate up to 20 pages of an {api}. <a href=\"http://support.diffbot.com/automatic-apis/handling-multiple-page-articles/\">More on automatic concatenation</a>.",
"discussion": "Number of pages in the thread concatenated to form the <code>posts</code> response. Use <code>maxPages</code> to define how many pages to concatenate. <a href=\"http://support.diffbot.com/automatic-apis/handling-multiple-page-articles/\">More on automatic concatenation</a>."
},
"numPosts": {
"default": "Number of individual posts in the thread."
},
"ocr": {
"default": "If text is identified within the image, we will attempt to recognize the text string."
},
"offerPrice": {
"default": "Offer or actual/final price of the product."
},
"offerPriceDetails": {
"default": "<code>offerPrice</code> separated into its constituent parts: <code>amount</code>, <code>symbol</code>, and full <code>text</code>."
},
"optional": "Optional fields, available using <code>fields=</code> argument",
"pageUrl": {
"default": "URL of submitted page / page from which the {api} is extracted."
},
"participants": {
"default": "Number of unique participants in the discussion thread or comments."
},
"posts": {
"default": "Array of individual posts.",
"subfields": {
"author": {
"default": "Name/username of the post author."
},
"authorUrl": {
"default": "URL of the author profile page, if available."
},
"date": {
"default": "Date of post, normalized in most cases to <a href=\"http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.3\">RFC 1123 (HTTP/1.1)</a>."
},
"diffbotUri": {
"default": "Internal ID used for indexing."
},
"html": {
"default": "Diffbot-normalized HTML of the extracted post. Please see the <a href=\"/dev/docs/article/html\">HTML Specification</a> for a breakdown of elements and attributes returned."
},
"humanLanguage": {
"default": "Spoken/human language of the post, using two-letter <a href=\"http://en.wikipedia.org/wiki/List_of_ISO_639-1_codes\" target=\"_blank\">ISO 639-1 nomenclature</a>."
},
"id": {
"default": "ID of the individual post. The first post of a thread will have an ID of 0."
},
"images": {
"default": "If any images are detected within post content, they will be returned in a separate array. Individual array fields are the same as the <a href='/dev/docs/article'>Article API's</a> <code>images</code> array."
},
"pageUrl": {
"default": "URL of the page on which the post was found."
},
"parentId": {
"default": "ID of the parent, if the post is a reply or response."
},
"sentiment": {
"default": "Returns a sentiment score from -1.0 (very negative) to 1.0 (very positive) for the individual post."
},
"tags": {
"default": "If the post is long enough, an array of tags generated from its specific content."
},
"text": {
"default": "Full text of the extracted post."
},
"type": {
"default": "Type of element (always <code>post</code>)."
}
}
},
"prefixCode": {
"default": "Country of origin as identified by UPC/ISBN."
},
"priceRange": {
"default": "If the product is available in a range of prices, the minimum and maximum values will be returned. The lowest price will also be returned as the <code>offerPrice</code>.",
"subfields": {
"maxPrice": {
"default": "The maximum price for the offered item."
},
"minPrice": {
"default": "The minimum price for the offered item."
}
}
},
"productId": {
"default": "Diffbot-determined unique product ID. If <code>upc</code>, <code>isbn</code>, <code>mpn</code> or <code>sku</code> are identified on the page, <code>productId</code> will select from these values in the above order."
},
"productOrigin": {
"default": "If available, two-character ISO country code where the product was produced."
},
"provider": {
"default": "Discussion service provider (e.g., Disqus, Facebook), if known."
},
"publisherCountry": {
"default": "If known, the country of the article publication."
},
"publisherRegion": {
"default": "If known, the region of the article publication."
},
"quantityPrices": {
"default": "If the product is available with quantity-based discounts, all identifiable price points will be returned. The lowest price will also be returned as the <code>offerPrice</code>.",
"subfields": {
"minQuantity": {
"default": "The minimum quantity required to purchase for the associated price."
},
"price": {
"default": "Price of the specific quantity level."
}
}
},
"querystring": {
"default": "Returns any key/value pairs present in the URL querystring. Items without a discrete value will be returned as <code>true</code>."
},
"regularPrice": {
"default": "Regular or original price of the product, if available."
},
"regularPriceDetails": {
"default": "<code>regularPrice</code> separated into its constituent parts: <code>amount</code>, <code>symbol</code>, and full <code>text</code>."
},
"resolvedPageUrl": {
"default": "Returned if the <code>pageUrl</code> redirects to another URL."
},
"rssUrl": {
"default": "URL of the {api}'s RSS feed, if available."
},
"saveAmount": {
"default": "Discount or amount saved off the regular price."
},
"saveAmountDetails": {
"default": "<code>saveAmount</code> separated into its constituent parts: <code>amount</code>, <code>symbol</code>, full <code>text</code>, and whether or not it is a <code>percentage</code> value."
},
"sentiment": {
"default": "Returns the sentiment score of the analyzed article text, a value randing from -1.0 (very negative) to 1.0 (very positive).",
"discussion": "Returns a sentiment score of each individual post, a value ranging from -1.0 (very negative) to 1.0 (very positive)."
},
"shippingAmount": {
"default": "Shipping price."
},
"siteName": {
"default": "The plain-text name of the site (e.g. <code>The New York Times</code> or <code>Diffbot</code>). If no site name is automatically determined, the root domain (<code>diffbot.com</code>) will be returned."
},
"size": {
"default": "Size in bytes of the image file.",
"product": "Size(s) available, if identified on the page."
},
"sku": {
"default": "Stock Keeping Unit -- store/vendor inventory number or identifier."
},
"specs": {
"default": "If a specifications table or similar data is available on the product page, individual specifications will be returned in the <code>specs</code> object as name/value pairs. Names will be normalized to lowercase with spaces replaced by underscores, e.g. <code>display_resolution</code>."
},
"stats": {
"analyze": "If the <code>stats</code> argument is passed in the request, the <code>stats</code> object will be returned, including an array of page-types and associated confidence scores.",
"default": "Returns statistics on the page evaluation and extraction."
},
"tags": {
"article": "Array of tags/entities, generated from analysis of the extracted <code>text</code> and cross-referenced with <a href='http://wiki.dbpedia.org/About' target='_new'>DBpedia</a> and other data sources.",
"default": "Array of tags/entities, generated from analysis of the extracted <code>text</code> and cross-referenced with <a href='http://wiki.dbpedia.org/About' target='_new'>DBpedia</a> and other data sources.",
"discussion": "Array of tags/entities as generated from analysis of all extracted <code>posts</code> and cross-referenced with <a href='http://wiki.dbpedia.org/About' target='_new'>DBpedia</a> and other data sources.",
"subfields": {
"count": {
"default": "Number of appearances the entity makes within the text content."
},
"label": {
"default": "Name of the entity or tag."
},
"prevalence": {
"default": "Ratio of the number of appearances of this entity compared to those of all entities within the text content."
},
"type": {
"default": "Link to the entity type, if identified, most commonly at DBpedia."
},
"uri": {
"default": "Link to the entity at DBpedia or other data source."
}
}
},
"text": {
"default": "Full text of the {api}.",
"product": "Text description, if available, of the {api}.",
"video": "Text description, if available, of the {api}."
},
"textNode": {
"default": "MIME type, if available, as specified by the image's \"Content-Type.\""
},
"title": {
"analyze": "Title of the page.",
"default": "Title of the {api}.",
"disussion": "Title of the discussion thread or parent post.",
"image": "Title or caption of the image, if available."
},
"type": {
"analyze": "Page-type of the submitted URL, either <code>article</code>, <code>image</code>, <code>product</code> or <code>other</code>.",
"default": "Type of object (always <code>{api}</code>)."
},
"upc": {
"default": "Universal Product Code (UPC/EAN), if available."
},
"url": {
"default": "Direct link to image file.",
"video": "Direct link to source video file, if available."
},
"videos": {
"article": "Array of videos, if present within the article body.",
"default": "Array of videos, if present within the {api}.",
"subfields": {
"diffbotUri": {
"default": "Internal ID used for indexing."
},
"naturalHeight": {
"default": "Source video height, in pixels, if available."
},
"naturalWidth": {
"default": "Source video width, in pixels, if available."
},
"primary": {
"default": "Returns <code>true</code> if video is identified as primary based on visual analysis."
},
"url": {
"default": "Fully resolved link to source video content."
}
}
},
"viewCount": {
"default": "Number of {API} views, if available on the page."
},
"width": {
"default": "Width of image as (re-)sized via browser/CSS."
},
"xpath": {
"default": "XPath expression identifying the {api} node."
}
}
}
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# write out HTMl based on replacement code
import sys
sys.path.append('/Users/john/svn/Miscellaneous/john')
sys.path.append('/home/jdavi/svn/john')
from jdhelpers import *
import cgi
import collections
import codecs
fields_file = open('docFields.txt','r+')
table_types = json.load(fields_file)
fields_file.seek(0)
fields_file.write(json.dumps(table_types,indent=4,sort_keys=True))
fields_file.close()
api_files = {}
def parseFields(text):
'''This parses the fields specifically, including subfields'''
open = False
fields = []
placeholder = ""
for i,item in enumerate(text.split(',')):
if open == False:
if ("(" not in item) and (")" not in item):
fields.append(item)
else:
placeholder = item
open = True
else:
placeholder = ",".join([placeholder,item])
if ")" in item:
open = False
fields.append(placeholder)
return fields
def parseLines(text):
'''This parses an entire line from the apis.txt doc-configurator'''
apis = {}
for line in text:
if ":" not in line:
api = line.strip('\n').split (' ')[0]
api_filename = line.strip('\n').split(' ')[1]
apis[api] = {}
api_files[api] = api_filename
elif line.count(":") == 1:
_line = line.strip('\n').replace(' ','')
section = _line.split(':')[0]
table = section.split('_')[0]
try:
apis[api][table]
except:
apis[api][table] = {}
table_type = section.split('_')[1]
fields = parseFields(_line.split(':')[1])
apis[api][table][table_type] = fields
# parse a setting with a URL
else:
_line = line.strip('\n').replace(' ','')
section = _line.split(':')[0]
data_type = section.split('_')[0]
try:
apis[api][data_type]
except:
apis[api][data_type] = {}
slot = section.split('_')[1]
url = "".join(_line.split(':',1)[1:(len(_line)+1)])
print url
apis[api][data_type][slot] = url
return apis
def parseSubfields(text):
text = text.rstrip(')').split('(')
return text[1].split(',')
class writeHtml(object):
def __init__(self,api,table_type):
self.filename = "%s_%s.html" % (api,table_type)
with codecs.open(self.filename,'w',encoding='utf8') as fo:
fo.close()
self.html_holder = ""
self.api = api
self.table_type = table_type
def append(self,markup):
fo = codecs.open(self.filename,'a','utf-8')
fo.write("%s\n" % markup)
fo.close()
self.html_holder += markup
def open(self,type):
if "codesample" in type:
self.append('<pre>')
else:
table_header = type.rstrip('s').capitalize()
self.append('<table class="controls table table-bordered" id="%s" border="0" cellpadding="5">\n' % type)
self.append('\t<thead><tr><th>%s</th><th>Description</th></tr></thead>\n' % table_header)
def close(self,type):
if "codesample" in type:
self.append('</pre>')
else:
self.append("</table>")
def file_insert(self):
filename = api_files[self.api]
fo = codecs.open(filename,'r+',encoding='utf8')
text = fo.read()
key_start = "<!--{%s}-->" % self.table_type
key_end = "<!--{end%s}-->" % self.table_type
if key_start in text and key_end in text:
before_text = text.partition(key_start)[0]
after_text = text.partition(key_end)[2]
text = before_text + key_start + self.html_holder + key_end + after_text
fo.seek(0)
fo.write(text)
fo.close()
def getRow(title,api,table_type,section_type,parent="",parentRow=False):
'''This function takes the field and returns a row of HTML, either a regular row,
a title row ("images"), or an indented row ("images > pixelHeight")'''
if parent != "":
try:
description = table_types[table_type][parent]['subfields'][title][api].format(api=api,API=api.capitalize())
except:
description = table_types[table_type][parent]['subfields'][title]['default'].format(api=api,API=api.capitalize())
parent = "%s indent" % parent # set this for the class name
else:
try:
description = table_types[table_type][title][api].format(api=api,API=api.capitalize())
except KeyError:
description = table_types[table_type][title]['default'].format(api=api,API=api.capitalize())
if parentRow == True:
parent = "parent"
html = '''
<tr>
<td class="{className}"><code>{title}</code></td>
<td class="{className} {sectionType}"><div>{description}</div></td>
</tr>'''.format(className=parent,sectionType=section_type,title=title,description=description)
return html
def headerRow(section_type,table_type):
try:
header_text = table_types[table_type][section_type]
except:
return None
html = '''
<tr>
<td colspan="2" class="header">{header_text}</td>
</tr>'''.format(header_text=header_text)
return html
def cleanResponse(text):
text = text.replace('doctoken','sampletoken')
text = text.replace('john@diffbot.com','support@diffbot.com')
text = text.replace('?mode=high-precision','')
text = text.replace('\u2019','\'')
text = cgi.escape(text)
print text
return text
def getSample(url):
r = requests.get(url)
j = json.loads(r.text, object_pairs_hook=collections.OrderedDict)
if "article" in url:
fo = codecs.open('text.html','w','utf-8')
fo.write(json.dumps(j,indent=2))
fo.close()
text = json.dumps(j,indent=2)
return cleanResponse(text)
def main():
fo = open('docApis.txt','r')
apis = parseLines(fo)
for api in apis.keys():
if single_api:
if api != single_api:
continue
#html = writeHtml(api)
for docsection_type in apis[api].keys():
if docsection_type != "codesample":
html = writeHtml(api,docsection_type)
html.open(docsection_type)
for section_type in apis[api][docsection_type].keys():
try:
rows = apis[api][docsection_type][section_type]
except:
continue
header = headerRow(section_type,docsection_type)
if header != None:
html.append(header)
for row in rows:
if "(" in row:
parent = row.split('(')[0]
html.append(getRow(parent,api,docsection_type,section_type,parentRow=True))
subfields = parseSubfields(row)
for subfield in subfields:
html.append(getRow(subfield,api,docsection_type,section_type,parent=parent))
else:
html.append(getRow(row,api,docsection_type,section_type))
html.close(docsection_type)
html.file_insert()
else:
for samplenumber in apis[api][docsection_type]:
name = "%s%s" % (docsection_type,samplenumber)
html = writeHtml(api,name)
html.open(name)
preview = getSample(apis[api][docsection_type][samplenumber])
html.append(preview)
html.close(name)
print preview
html.file_insert()
# parse args
parser = argparse.ArgumentParser()
parser.add_argument('-a', '--api', required=False, help="Pass a single API to generate that documentation only")
args = parser.parse_args()
single_api = args.api
if __name__ == '__main__':
print('Executed from the command line')
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment