druwynings/docApis.txt Secret

## docApis.txt
product api-product_v3.html
fields_default: type,pageUrl,resolvedPageUrl,title,text,brand,offerPrice,regularPrice,shippingAmount,saveAmount,priceRange(minPrice,maxPrice),quantityPrices(minQuantity,price),offerPriceDetails,regularPriceDetails,saveAmountDetails,productId,upc,sku,mpn,isbn,specs,images(url,title,naturalHeight,naturalWidth,primary,xpath,diffbotUri),discussion,prefixCode,productOrigin,humanLanguage,diffbotUri
fields_optional: links,meta,querystring,breadcrumb
fields_experimental: availability,colors,size
arguments_default: token,url
arguments_optional: fields,discussion,timeout,callback
article api-article_v3.html
fields_default: type,title,text,html,date,estimatedDate,author,authorUrl,discussion,humanLanguage,numPages,nextPages,siteName,publisherRegion,publisherCountry,pageUrl,resolvedPageUrl,tags(label,count,prevalence,type,uri),images(url,title,height,width,naturalHeight,naturalWidth,primary,diffbotUri),videos(url,naturalHeight,naturalWidth,primary,diffbotUri),diffbotUri
fields_optional:sentiment,links,meta,querystring,breadcrumb
arguments_default: token,url
arguments_optional: fields,paging,maxTags,discussion,timeout,callback
codesample_1: http://api.diffbot.com/v3/article?token=john&url=http%3A%2F%2Fblog.diffbot.com%2Fdiffbots-new-product-api-teaches-robots-to-shop-online
image api-image_v3.html
fields_default: type,url,title,naturalHeight,naturalWidth,humanLanguage,anchorUrl,pageUrl,resolvedPageUrl,xpath,diffbotUri
fields_optional: displayHeight,displayWidth,links,meta,querystring,breadcrumb
fields_experimental: mentions,ocr,faces
arguments_default: token,url
arguments_optional: fields,timeout,callback
analyze api-analyze_v3.html
arguments_default: token,url
arguments_optional: fields,mode,discussion,timeout,callback
fields_default: title,type,humanLanguage
fields_optional: links,meta,querystring,breadcrumb
semantria api-semantria.html
arguments_default: textAnalysis,semantriaKey,semantriaSecret
discussion api-discussion_v3.html
arguments_default: token,url
arguments_optional: fields,timeout,callback,maxPages
fields_default: type,pageUrl,resolvedPageUrl,title,numPosts,posts(type,id,parentId,text,html,tags,humanLanguage,images,date,author,authorUrl,pageUrl,diffbotUri),tags,participants,numPages,nextPage,nextPages,provider,humanLanguage,rssUrl,diffbotUri
fields_optional: sentiment,links,meta,querystring,breadcrumb
codesample_1: http://api.diffbot.com/v3/discussion?version=3&token=john&url=https%3A%2F%2Fnews.ycombinator.com%2Fitem%3Fid%3D5608988
video api-video_v3.html
arguments_default: token,url
arguments_optional: fields,timeout,callback
fields_default: type,pageUrl,resolvedPageUrl,title,text,url,html,embedUrl,author,date,duration,viewCount,naturalHeight,naturalWidth,images(url,title),mime,humanLanguage,diffbotUri
fields_optional: links,meta,querystring,breadcrumb
bulk api-bulk.html
codesample_1: https://api.diffbot.com/v3/bulk?token=doctoken&name=bulkJob
crawl api-crawlbot_v2.html
codesample_1: https://api.diffbot.com/v3/crawl?token=doctoken&name=crawlJob

## docFields.txt
{
    "arguments": {
        "breadcrumb": {
            "default": "Returns a top-level array (<code>breadcrumb</code>) of URLs and link text from page breadcrumbs."
        },
        "callback": {
            "default": "Use for jsonp requests. Needed for cross-domain ajax."
        },
        "discussion": {
            "analyze": "Pass <code>discussion=false</code> to disable automatic extraction of comments or reviews from pages identified as articles or products. This will not affect pages identified as discussions.",
            "default": "Pass <code>discussion=false</code> to disable automatic extraction of article comments. See <a href='#discussion'>below</a>.",
            "product": "Pass <code>discussion=false</code> to disable automatic extraction of product reviews. See <a href='#discussion'>below</a>."
        },
        "fields": {
            "analyze": "Specify optional fields to be returned from any fully-extracted pages, e.g.: <code>&fields=querystring,links</code>.<br><br>See available fields within each API's individual documentation pages.",
            "default": "Used to specify optional fields to be returned by the {API} API. See the <a href=\"#fields\">Fields</a> section below."
        },
        "links": {
            "default": "Returns a top-level object (<code>links</code>) containing all hyperlinks found on the page."
        },
        "maxPages": {
            "default": "Set the maximum number of pages in a thread to automatically concatenate in a single response. Default = 1 (no concatenation). Set <code>maxPages=all</code> to retrieve all pages of a thread regardless of length. Each individual page will count as a separate API call."
        },
        "maxTags": {
            "default": "Set the maximum number of automatically-generated tags to return. By default a maximum of five tags will be returned."
        },
        "meta": {
            "default": "Returns a top-level object (<code>meta</code>) containing the full contents of page <code>meta</code> tags, including sub-arrays for <a href=\"http://ogp.me/\" target=\"_new\">OpenGraph</a> tags, <a href=\"https://dev.twitter.com/docs/cards/markup-reference\" target=\"_new\">Twitter Card</a> metadata, <a href=\"http://www.schema.org\" target=\"_new\">schema.org</a> microdata, and -- if available -- <a href=\"http://www.oembed.com\" target=\"_new\">oEmbed</a> metadata."
        },
        "mode": {
            "default": "By default the Analyze API will fully extract all pages that match an existing Automatic API -- articles, products or image pages. Set <code>mode</code> to a specific page-type (e.g., <code>mode=article</code>) to extract content only from that specific page-type. All other pages will simply return the default Analyze fields."
        },
        "optional": "Optional arguments",
        "paging": {
            "default": "Pass <code>paging=false</code> to disable automatic concatenation of multiple-page articles. (By default, Diffbot will concatenate up to 20 pages of a single article.) <a href=\"http://support.diffbot.com/automatic-apis/handling-multiple-page-articles\">More on automatic concatenation</a>."
        },
        "querystring": {
            "default": "Returns any key/value pairs present in the URL querystring. Items without a discrete value will be returned as <code>true</code>."
        },
        "semantriaKey": {
            "default": "Include your Semantria API key (available from your Semantria dashboard)."
        },
        "semantriaSecret": {
            "default": "Include your Semantria API secret (available from your Semantria dashboard)."
        },
        "stats": {
            "default": "Returns statistics on page classification, including the array of individual page-types that comprise the \"other\" category, and Diffbot confidence score for each type. <a href=\"#pagetypes\">See all page-types</a>."
        },
        "textAnalysis": {
            "default": "Pass either <code>&textAnalysis</code> or request the field <code>textAnalysis</code> to return the Semantria-powered object in your response."
        },
        "timeout": {
            "default": "Set a value in milliseconds to terminate the response. By default the {API} API has a 30-second (30000) timeout."
        },
        "token": {
            "default": "Developer token"
        },
        "url": {
            "default": "Web page URL of the {api} to process (URL encoded)"
        }
    },
    "fields": {
        "anchorUrl": {
            "default": "If the image is hyperlinked, returns the destination URL."
        },
        "attrAlt": {
            "default": "Contents of the image's <code>alt</code> attribute."
        },
        "attrTitle": {
            "default": "Contents of the image's <code>title</code> attribute."
        },
        "author": {
            "default": "{API} author.",
            "video": "{API} uploader or creator, if available."
        },
        "authorUrl": {
            "default": "URL of the author profile page, if available."
        },
        "availability": {
            "default": "Item's availability, either <code>true</code> or <code>false</code>."
        },
        "brand": {
            "default": "Item's brand name."
        },
        "breadcrumb": {
            "default": "Returns a top-level array (<code>breadcrumb</code>) of URLs and link text from page breadcrumbs."
        },
        "colors": {
            "default": "Returns an array of hex values of the dominant colors within the image.",
            "product": "Returns array of product color options."
        },
        "date": {
            "default": "Date of extracted {api}, normalized in most cases to <a href=\"http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.3\">RFC 1123 (HTTP/1.1)</a>.",
            "image": "Date associated with the image, if available in page content."
        },
        "diffbotUri": {
            "default": "Unique object ID. The <code>diffbotUri</code> is generated from the values of various {API} fields and uniquely identifies the object. This can be used for deduplication."
        },
        "discussion": {
            "default": "Article comments, as extracted by the Diffbot Discussion API. See <a href='#discussion'>below</a>.",
            "product": "Product reviews, as extracted by the Diffbot Discussion API. See <a href='#discussion'>below</a>."
        },
        "duration": {
            "default": "Duration in seconds of the {API}."
        },
        "embedUrl": {
            "default": "Embeddable URL, if available."
        },
        "estimatedDate": {
            "default": "If an article's date is ambiguous, Diffbot will attempt to estimate a more specific timestamp using various factors. This will not be generated for articles older than two days, or articles without an identified <code>date</code>."
        },
        "experimental": "The following fields are in an early beta stage:",
        "faces": {
            "default": "The x, y, height and width of coordinates of human faces. Returns null if no faces are found."
        },
        "height": {
            "default": "Height of image as (re-)sized via browser/CSS."
        },
        "html": {
            "default": "Diffbot-normalized HTML of the extracted article. Please see the <a href=\"/dev/docs/article/html\">HTML Specification</a> for a breakdown of elements and attributes returned.",
            "video": "Embeddable HTML of the video (if available), typically an <code>IFRAME</code> or <code>VIDEO</code> object."
        },
        "humanLanguage": {
            "default": "Returns the (spoken/human) language of the submitted page, using two-letter <a href=\"http://en.wikipedia.org/wiki/List_of_ISO_639-1_codes\" target=\"_blank\">ISO 639-1 nomenclature</a>.",
            "discussion": "Spoken/human language of the discussion / comment thread, using two-letter <a href=\"http://en.wikipedia.org/wiki/List_of_ISO_639-1_codes\" target=\"_blank\">ISO 639-1 nomenclature</a>."
        },
        "images": {
            "article": "Array of images, if present within the article body.",
            "default": "Array of images, if present within the {api}.",
            "subfields": {
                "diffbotUri": {
                    "default": "Internal ID used for indexing."
                },
                "height": {
                    "default": "Height of image as (re-)sized via browser/CSS."
                },
                "naturalHeight": {
                    "default": "Raw image height, in pixels."
                },
                "naturalWidth": {
                    "default": "Raw image width, in pixels."
                },
                "primary": {
                    "default": "Returns <code>true</code> if image is identified as primary based on visual analysis."
                },
                "text": {
                    "default": "Diffbot-determined best caption for or description of the image."
                },
                "title": {
                    "default": "Description or caption of the image."
                },
                "url": {
                    "default": "Fully resolved link to image. If the image <code>SRC</code> is encoded as base64 data, the complete data URI will be returned."
                },
                "width": {
                    "default": "Width of image as (re-)sized via browser/CSS."
                },
                "xpath": {
                    "default": "XPath expression identifying the image node."
                }
            }
        },
        "isbn": {
            "default": "International Standard Book Number (ISBN), if available."
        },
        "links": {
            "default": "Returns a top-level object (<code>links</code>) containing all hyperlinks found on the page."
        },
        "mentions": {
            "default": "Array of articles upon which the same or similar image may be found."
        },
        "meta": {
            "default": "Returns a top-level object (<code>meta</code>) containing the full contents of page <code>meta</code> tags, including sub-arrays for <a href=\"http://ogp.me/\" target=\"_new\">OpenGraph</a> tags, <a href=\"https://dev.twitter.com/docs/cards/markup-reference\" target=\"_new\">Twitter Card</a> metadata, <a href=\"http://www.schema.org\" target=\"_new\">schema.org</a> microdata, and -- if available -- <a href=\"http://www.oembed.com\" target=\"_new\">oEmbed</a> metadata.",
            "image": "Comma-separated list of image-embedded metadata (e.g., EXIF, XMP, ICC Profile), if available within the image file."
        },
        "mime": {
            "default": "MIME type, if available, as specified by the {API}'s \"Content-Type.\""
        },
        "mpn": {
            "default": "Manufacturer's Product Number."
        },
        "naturalHeight": {
            "default": "Raw image height, in pixels.",
            "video": "Raw video height, if available, in pixels."
        },
        "naturalWidth": {
            "default": "Raw image width, in pixels.",
            "video": "Raw video width, if available, in pixels."
        },
        "displayHeight": {
            "default": "Height of image as presented in the browser (and as sized via browser/CSS, if resized)."
        },
        "displayWidth": {
            "default": "Width of image as presented in the browser (and as sized via browser/CSS, if resized)."
        },
        "nextPage": {
            "default": "If {api} spans multiple pages, <code>nextPage</code> will return the subsequent page URL."
        },
        "nextPages": {
            "default": "Array of all page URLs concatenated in a multipage {api}. <a href=\"http://support.diffbot.com/automatic-apis/handling-multiple-page-articles/\">More on automatic concatenation</a>."
        },
        "numPages": {
            "default": "Number of pages automatically concatenated to form the <code>text</code> or <code>html</code> response. By default, Diffbot will automatically concatenate up to 20 pages of an {api}. <a href=\"http://support.diffbot.com/automatic-apis/handling-multiple-page-articles/\">More on automatic concatenation</a>.",
            "discussion": "Number of pages in the thread concatenated to form the <code>posts</code> response. Use <code>maxPages</code> to define how many pages to concatenate. <a href=\"http://support.diffbot.com/automatic-apis/handling-multiple-page-articles/\">More on automatic concatenation</a>."
        },
        "numPosts": {
            "default": "Number of individual posts in the thread."
        },
        "ocr": {
            "default": "If text is identified within the image, we will attempt to recognize the text string."
        },
        "offerPrice": {
            "default": "Offer or actual/final price of the product."
        },
        "offerPriceDetails": {
            "default": "<code>offerPrice</code> separated into its constituent parts: <code>amount</code>, <code>symbol</code>, and full <code>text</code>."
        },
        "optional": "Optional fields, available using <code>fields=</code> argument",
        "pageUrl": {
            "default": "URL of submitted page / page from which the {api} is extracted."
        },
        "participants": {
            "default": "Number of unique participants in the discussion thread or comments."
        },
        "posts": {
            "default": "Array of individual posts.",
            "subfields": {
                "author": {
                    "default": "Name/username of the post author."
                },
                "authorUrl": {
                    "default": "URL of the author profile page, if available."
                },
                "date": {
                    "default": "Date of post, normalized in most cases to <a href=\"http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.3\">RFC 1123 (HTTP/1.1)</a>."
                },
                "diffbotUri": {
                    "default": "Internal ID used for indexing."
                },
                "html": {
                    "default": "Diffbot-normalized HTML of the extracted post. Please see the <a href=\"/dev/docs/article/html\">HTML Specification</a> for a breakdown of elements and attributes returned."
                },
                "humanLanguage": {
                    "default": "Spoken/human language of the post, using two-letter <a href=\"http://en.wikipedia.org/wiki/List_of_ISO_639-1_codes\" target=\"_blank\">ISO 639-1 nomenclature</a>."
                },
                "id": {
                    "default": "ID of the individual post. The first post of a thread will have an ID of 0."
                },
                "images": {
                    "default": "If any images are detected within post content, they will be returned in a separate array. Individual array fields are the same as the <a href='/dev/docs/article'>Article API's</a> <code>images</code> array."
                },
                "pageUrl": {
                    "default": "URL of the page on which the post was found."
                },
                "parentId": {
                    "default": "ID of the parent, if the post is a reply or response."
                },
                "sentiment": {
                    "default": "Returns a sentiment score from -1.0 (very negative) to 1.0 (very positive) for the individual post."
                },
                "tags": {
                    "default": "If the post is long enough, an array of tags generated from its specific content."
                },
                "text": {
                    "default": "Full text of the extracted post."
                },
                "type": {
                    "default": "Type of element (always <code>post</code>)."
                }
            }
        },
        "prefixCode": {
            "default": "Country of origin as identified by UPC/ISBN."
        },
        "priceRange": {
            "default": "If the product is available in a range of prices, the minimum and maximum values will be returned. The lowest price will also be returned as the <code>offerPrice</code>.",
            "subfields": {
                "maxPrice": {
                    "default": "The maximum price for the offered item."
                },
                "minPrice": {
                    "default": "The minimum price for the offered item."
                }
            }
        },
        "productId": {
            "default": "Diffbot-determined unique product ID. If <code>upc</code>, <code>isbn</code>, <code>mpn</code> or <code>sku</code> are identified on the page, <code>productId</code> will select from these values in the above order."
        },
        "productOrigin": {
            "default": "If available, two-character ISO country code where the product was produced."
        },
        "provider": {
            "default": "Discussion service provider (e.g., Disqus, Facebook), if known."
        },
        "publisherCountry": {
            "default": "If known, the country of the article publication."
        },
        "publisherRegion": {
            "default": "If known, the region of the article publication."
        },
        "quantityPrices": {
            "default": "If the product is available with quantity-based discounts, all identifiable price points will be returned. The lowest price will also be returned as the <code>offerPrice</code>.",
            "subfields": {
                "minQuantity": {
                    "default": "The minimum quantity required to purchase for the associated price."
                },
                "price": {
                    "default": "Price of the specific quantity level."
                }
            }
        },
        "querystring": {
            "default": "Returns any key/value pairs present in the URL querystring. Items without a discrete value will be returned as <code>true</code>."
        },
        "regularPrice": {
            "default": "Regular or original price of the product, if available."
        },
        "regularPriceDetails": {
            "default": "<code>regularPrice</code> separated into its constituent parts: <code>amount</code>, <code>symbol</code>, and full <code>text</code>."
        },
        "resolvedPageUrl": {
            "default": "Returned if the <code>pageUrl</code> redirects to another URL."
        },
        "rssUrl": {
            "default": "URL of the {api}'s RSS feed, if available."
        },
        "saveAmount": {
            "default": "Discount or amount saved off the regular price."
        },
        "saveAmountDetails": {
            "default": "<code>saveAmount</code> separated into its constituent parts: <code>amount</code>, <code>symbol</code>, full <code>text</code>, and whether or not it is a <code>percentage</code> value."
        },
        "sentiment": {
            "default": "Returns the sentiment score of the analyzed article text, a value randing from -1.0 (very negative) to 1.0 (very positive).",
            "discussion": "Returns a sentiment score of each individual post, a value ranging from -1.0 (very negative) to 1.0 (very positive)."
        },
        "shippingAmount": {
            "default": "Shipping price."
        },
        "siteName": {
            "default": "The plain-text name of the site (e.g. <code>The New York Times</code> or <code>Diffbot</code>). If no site name is automatically determined, the root domain (<code>diffbot.com</code>) will be returned."
        },
        "size": {
            "default": "Size in bytes of the image file.",
            "product": "Size(s) available, if identified on the page."
        },
        "sku": {
            "default": "Stock Keeping Unit -- store/vendor inventory number or identifier."
        },
        "specs": {
            "default": "If a specifications table or similar data is available on the product page, individual specifications will be returned in the <code>specs</code> object as name/value pairs. Names will be normalized to lowercase with spaces replaced by underscores, e.g. <code>display_resolution</code>."
        },
        "stats": {
            "analyze": "If the <code>stats</code> argument is passed in the request, the <code>stats</code> object will be returned, including an array of page-types and associated confidence scores.",
            "default": "Returns statistics on the page evaluation and extraction."
        },
        "tags": {
            "article": "Array of tags/entities, generated from analysis of the extracted <code>text</code> and cross-referenced with <a href='http://wiki.dbpedia.org/About' target='_new'>DBpedia</a> and other data sources.",
            "default": "Array of tags/entities, generated from analysis of the extracted <code>text</code> and cross-referenced with <a href='http://wiki.dbpedia.org/About' target='_new'>DBpedia</a> and other data sources.",
            "discussion": "Array of tags/entities as generated from analysis of all extracted <code>posts</code> and cross-referenced with <a href='http://wiki.dbpedia.org/About' target='_new'>DBpedia</a> and other data sources.",
            "subfields": {
                "count": {
                    "default": "Number of appearances the entity makes within the text content."
                },
                "label": {
                    "default": "Name of the entity or tag."
                },
                "prevalence": {
                    "default": "Ratio of the number of appearances of this entity compared to those of all entities within the text content."
                },
                "type": {
                    "default": "Link to the entity type, if identified, most commonly at DBpedia."
                },
                "uri": {
                    "default": "Link to the entity at DBpedia or other data source."
                }
            }
        },
        "text": {
            "default": "Full text of the {api}.",
            "product": "Text description, if available, of the {api}.",
            "video": "Text description, if available, of the {api}."
        },
        "textNode": {
            "default": "MIME type, if available, as specified by the image's \"Content-Type.\""
        },
        "title": {
            "analyze": "Title of the page.",
            "default": "Title of the {api}.",
            "disussion": "Title of the discussion thread or parent post.",
            "image": "Title or caption of the image, if available."
        },
        "type": {
            "analyze": "Page-type of the submitted URL, either <code>article</code>, <code>image</code>, <code>product</code> or <code>other</code>.",
            "default": "Type of object (always <code>{api}</code>)."
        },
        "upc": {
            "default": "Universal Product Code (UPC/EAN), if available."
        },
        "url": {
            "default": "Direct link to image file.",
            "video": "Direct link to source video file, if available."
        },
        "videos": {
            "article": "Array of videos, if present within the article body.",
            "default": "Array of videos, if present within the {api}.",
            "subfields": {
                "diffbotUri": {
                    "default": "Internal ID used for indexing."
                },
                "naturalHeight": {
                    "default": "Source video height, in pixels, if available."
                },
                "naturalWidth": {
                    "default": "Source video width, in pixels, if available."
                },
                "primary": {
                    "default": "Returns <code>true</code> if video is identified as primary based on visual analysis."
                },
                "url": {
                    "default": "Fully resolved link to source video content."
                }
            }
        },
        "viewCount": {
            "default": "Number of {API} views, if available on the page."
        },
        "width": {
            "default": "Width of image as (re-)sized via browser/CSS."
        },
        "xpath": {
            "default": "XPath expression identifying the {api} node."
        }
    }
}

## docGenerator.py
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# write out HTMl based on replacement code

import sys
sys.path.append('/Users/john/svn/Miscellaneous/john')
sys.path.append('/home/jdavi/svn/john')
from jdhelpers import *
import cgi
import collections
import codecs

fields_file = open('docFields.txt','r+')
table_types = json.load(fields_file)
fields_file.seek(0)
fields_file.write(json.dumps(table_types,indent=4,sort_keys=True))
fields_file.close()

api_files = {}

def parseFields(text):
    '''This parses the fields specifically, including subfields'''

    open = False
    fields = []
    placeholder = ""
    for i,item in enumerate(text.split(',')):
        if open == False:
            if ("(" not in item) and (")" not in item):
                fields.append(item)
            else:
                placeholder = item
                open = True
        else:
            placeholder = ",".join([placeholder,item])
            if ")" in item:
                open = False
                fields.append(placeholder)
    return fields

def parseLines(text):
    '''This parses an entire line from the apis.txt doc-configurator'''

    apis = {}
    for line in text:
        if ":" not in line:
            api = line.strip('\n').split (' ')[0]
            api_filename = line.strip('\n').split(' ')[1]
            apis[api] = {}
            api_files[api] = api_filename

        elif line.count(":") == 1:
            _line = line.strip('\n').replace(' ','')
            section = _line.split(':')[0]
            table = section.split('_')[0]
            try:
                apis[api][table]
            except:
                apis[api][table] = {}
            table_type = section.split('_')[1]
            fields = parseFields(_line.split(':')[1])
            apis[api][table][table_type] = fields

        #  parse a setting with a URL
        else:
            _line = line.strip('\n').replace(' ','')
            section = _line.split(':')[0]
            data_type = section.split('_')[0]
            try:
                apis[api][data_type]
            except:
                apis[api][data_type] = {}
            slot = section.split('_')[1]
            url = "".join(_line.split(':',1)[1:(len(_line)+1)])
            print url
            apis[api][data_type][slot] = url
    return apis

def parseSubfields(text):
    text = text.rstrip(')').split('(')
    return text[1].split(',')

class writeHtml(object):

    def __init__(self,api,table_type):
        self.filename = "%s_%s.html" % (api,table_type)
        with codecs.open(self.filename,'w',encoding='utf8') as fo:
            fo.close()

        self.html_holder = ""
        self.api = api
        self.table_type = table_type

    def append(self,markup):
        fo = codecs.open(self.filename,'a','utf-8')
        fo.write("%s\n" % markup)
        fo.close()

        self.html_holder += markup

    def open(self,type):
        if "codesample" in type:
            self.append('<pre>')
        else:
            table_header = type.rstrip('s').capitalize()
            self.append('<table class="controls table table-bordered" id="%s" border="0" cellpadding="5">\n' % type)
            self.append('\t<thead><tr><th>%s</th><th>Description</th></tr></thead>\n' % table_header)

    def close(self,type):
        if "codesample" in type:
            self.append('</pre>')
        else:
            self.append("</table>")

    def file_insert(self):
        filename = api_files[self.api]
        fo = codecs.open(filename,'r+',encoding='utf8')
        text = fo.read()
        key_start = "<!--{%s}-->" % self.table_type
        key_end = "<!--{end%s}-->" % self.table_type
        if key_start in text and key_end in text:
            before_text = text.partition(key_start)[0]
            after_text = text.partition(key_end)[2]
            text = before_text + key_start + self.html_holder + key_end + after_text
            fo.seek(0)
            fo.write(text)
            fo.close()

def getRow(title,api,table_type,section_type,parent="",parentRow=False):
    '''This function takes the field and returns a row of HTML, either a regular row,
       a title row ("images"), or an indented row ("images > pixelHeight")'''

    if parent != "":
        try:
            description = table_types[table_type][parent]['subfields'][title][api].format(api=api,API=api.capitalize())
        except:
            description = table_types[table_type][parent]['subfields'][title]['default'].format(api=api,API=api.capitalize())
        parent = "%s indent" % parent   # set this for the class name
    else:
        try:
            description = table_types[table_type][title][api].format(api=api,API=api.capitalize())
        except KeyError:
            description = table_types[table_type][title]['default'].format(api=api,API=api.capitalize())

    if parentRow == True:
        parent = "parent"

    html = '''
        <tr>
            <td class="{className}"><code>{title}</code></td>
            <td class="{className} {sectionType}"><div>{description}</div></td>
        </tr>'''.format(className=parent,sectionType=section_type,title=title,description=description)

    return html

def headerRow(section_type,table_type):
    try:
        header_text = table_types[table_type][section_type]
    except:
        return None
    html = '''

        <tr>
            <td colspan="2" class="header">{header_text}</td>
        </tr>'''.format(header_text=header_text)

    return html

def cleanResponse(text):
    text = text.replace('doctoken','sampletoken')
    text = text.replace('john@diffbot.com','support@diffbot.com')
    text = text.replace('?mode=high-precision','')
    text = text.replace('\u2019','\'')
    text = cgi.escape(text)
    print text
    return text

def getSample(url):
    r = requests.get(url)
    j = json.loads(r.text, object_pairs_hook=collections.OrderedDict)
    if "article" in url:
        fo = codecs.open('text.html','w','utf-8')
        fo.write(json.dumps(j,indent=2))
        fo.close()
    text = json.dumps(j,indent=2)
    return cleanResponse(text)

def main():

    fo = open('docApis.txt','r')
    apis = parseLines(fo)

    for api in apis.keys():

        if single_api:
            if api != single_api:
                continue

        #html = writeHtml(api)
        for docsection_type in apis[api].keys():

            if docsection_type != "codesample":
                html = writeHtml(api,docsection_type)
                html.open(docsection_type)
                for section_type in apis[api][docsection_type].keys():
                    try:
                        rows = apis[api][docsection_type][section_type]
                    except:
                        continue

                    header = headerRow(section_type,docsection_type)
                    if header != None:
                        html.append(header)

                    for row in rows:
                        if "(" in row:
                            parent = row.split('(')[0]
                            html.append(getRow(parent,api,docsection_type,section_type,parentRow=True))
                            subfields = parseSubfields(row)
                            for subfield in subfields:
                                html.append(getRow(subfield,api,docsection_type,section_type,parent=parent))
                        else:
                            html.append(getRow(row,api,docsection_type,section_type))
                html.close(docsection_type)
                html.file_insert()
            else:
                for samplenumber in apis[api][docsection_type]:
                    name = "%s%s" % (docsection_type,samplenumber)
                    html = writeHtml(api,name)
                    html.open(name)
                    preview = getSample(apis[api][docsection_type][samplenumber])
                    html.append(preview)
                    html.close(name)
                    print preview
                    html.file_insert()

# parse args
parser = argparse.ArgumentParser()
parser.add_argument('-a', '--api', required=False, help="Pass a single API to generate that documentation only")
args = parser.parse_args()
single_api = args.api

if __name__ == '__main__':
    print('Executed from the command line')
    main()
	product api-product_v3.html
	fields_default: type,pageUrl,resolvedPageUrl,title,text,brand,offerPrice,regularPrice,shippingAmount,saveAmount,priceRange(minPrice,maxPrice),quantityPrices(minQuantity,price),offerPriceDetails,regularPriceDetails,saveAmountDetails,productId,upc,sku,mpn,isbn,specs,images(url,title,naturalHeight,naturalWidth,primary,xpath,diffbotUri),discussion,prefixCode,productOrigin,humanLanguage,diffbotUri
	fields_optional: links,meta,querystring,breadcrumb
	fields_experimental: availability,colors,size
	arguments_default: token,url
	arguments_optional: fields,discussion,timeout,callback
	article api-article_v3.html
	fields_default: type,title,text,html,date,estimatedDate,author,authorUrl,discussion,humanLanguage,numPages,nextPages,siteName,publisherRegion,publisherCountry,pageUrl,resolvedPageUrl,tags(label,count,prevalence,type,uri),images(url,title,height,width,naturalHeight,naturalWidth,primary,diffbotUri),videos(url,naturalHeight,naturalWidth,primary,diffbotUri),diffbotUri
	fields_optional:sentiment,links,meta,querystring,breadcrumb
	arguments_default: token,url
	arguments_optional: fields,paging,maxTags,discussion,timeout,callback
	codesample_1: http://api.diffbot.com/v3/article?token=john&url=http%3A%2F%2Fblog.diffbot.com%2Fdiffbots-new-product-api-teaches-robots-to-shop-online
	image api-image_v3.html
	fields_default: type,url,title,naturalHeight,naturalWidth,humanLanguage,anchorUrl,pageUrl,resolvedPageUrl,xpath,diffbotUri
	fields_optional: displayHeight,displayWidth,links,meta,querystring,breadcrumb
	fields_experimental: mentions,ocr,faces
	arguments_default: token,url
	arguments_optional: fields,timeout,callback
	analyze api-analyze_v3.html
	arguments_default: token,url
	arguments_optional: fields,mode,discussion,timeout,callback
	fields_default: title,type,humanLanguage
	fields_optional: links,meta,querystring,breadcrumb
	semantria api-semantria.html
	arguments_default: textAnalysis,semantriaKey,semantriaSecret
	discussion api-discussion_v3.html
	arguments_default: token,url
	arguments_optional: fields,timeout,callback,maxPages
	fields_default: type,pageUrl,resolvedPageUrl,title,numPosts,posts(type,id,parentId,text,html,tags,humanLanguage,images,date,author,authorUrl,pageUrl,diffbotUri),tags,participants,numPages,nextPage,nextPages,provider,humanLanguage,rssUrl,diffbotUri
	fields_optional: sentiment,links,meta,querystring,breadcrumb
	codesample_1: http://api.diffbot.com/v3/discussion?version=3&token=john&url=https%3A%2F%2Fnews.ycombinator.com%2Fitem%3Fid%3D5608988
	video api-video_v3.html
	arguments_default: token,url
	arguments_optional: fields,timeout,callback
	fields_default: type,pageUrl,resolvedPageUrl,title,text,url,html,embedUrl,author,date,duration,viewCount,naturalHeight,naturalWidth,images(url,title),mime,humanLanguage,diffbotUri
	fields_optional: links,meta,querystring,breadcrumb
	bulk api-bulk.html
	codesample_1: https://api.diffbot.com/v3/bulk?token=doctoken&name=bulkJob
	crawl api-crawlbot_v2.html
	codesample_1: https://api.diffbot.com/v3/crawl?token=doctoken&name=crawlJob
	{
	"arguments": {
	"breadcrumb": {
	"default": "Returns a top-level array (<code>breadcrumb</code>) of URLs and link text from page breadcrumbs."
	},
	"callback": {
	"default": "Use for jsonp requests. Needed for cross-domain ajax."
	},
	"discussion": {
	"analyze": "Pass <code>discussion=false</code> to disable automatic extraction of comments or reviews from pages identified as articles or products. This will not affect pages identified as discussions.",
	"default": "Pass <code>discussion=false</code> to disable automatic extraction of article comments. See <a href='#discussion'>below</a>.",
	"product": "Pass <code>discussion=false</code> to disable automatic extraction of product reviews. See <a href='#discussion'>below</a>."
	},
	"fields": {
	"analyze": "Specify optional fields to be returned from any fully-extracted pages, e.g.: <code>&fields=querystring,links</code>.<br><br>See available fields within each API's individual documentation pages.",
	"default": "Used to specify optional fields to be returned by the {API} API. See the <a href=\"#fields\">Fields</a> section below."
	},
	"links": {
	"default": "Returns a top-level object (<code>links</code>) containing all hyperlinks found on the page."
	},
	"maxPages": {
	"default": "Set the maximum number of pages in a thread to automatically concatenate in a single response. Default = 1 (no concatenation). Set <code>maxPages=all</code> to retrieve all pages of a thread regardless of length. Each individual page will count as a separate API call."
	},
	"maxTags": {
	"default": "Set the maximum number of automatically-generated tags to return. By default a maximum of five tags will be returned."
	},
	"meta": {
	"default": "Returns a top-level object (<code>meta</code>) containing the full contents of page <code>meta</code> tags, including sub-arrays for <a href=\"http://ogp.me/\" target=\"_new\">OpenGraph</a> tags, <a href=\"https://dev.twitter.com/docs/cards/markup-reference\" target=\"_new\">Twitter Card</a> metadata, <a href=\"http://www.schema.org\" target=\"_new\">schema.org</a> microdata, and -- if available -- <a href=\"http://www.oembed.com\" target=\"_new\">oEmbed</a> metadata."
	},
	"mode": {
	"default": "By default the Analyze API will fully extract all pages that match an existing Automatic API -- articles, products or image pages. Set <code>mode</code> to a specific page-type (e.g., <code>mode=article</code>) to extract content only from that specific page-type. All other pages will simply return the default Analyze fields."
	},
	"optional": "Optional arguments",
	"paging": {
	"default": "Pass <code>paging=false</code> to disable automatic concatenation of multiple-page articles. (By default, Diffbot will concatenate up to 20 pages of a single article.) <a href=\"http://support.diffbot.com/automatic-apis/handling-multiple-page-articles\">More on automatic concatenation</a>."
	},
	"querystring": {
	"default": "Returns any key/value pairs present in the URL querystring. Items without a discrete value will be returned as <code>true</code>."
	},
	"semantriaKey": {
	"default": "Include your Semantria API key (available from your Semantria dashboard)."
	},
	"semantriaSecret": {
	"default": "Include your Semantria API secret (available from your Semantria dashboard)."
	},
	"stats": {
	"default": "Returns statistics on page classification, including the array of individual page-types that comprise the \"other\" category, and Diffbot confidence score for each type. <a href=\"#pagetypes\">See all page-types</a>."
	},
	"textAnalysis": {
	"default": "Pass either <code>&textAnalysis</code> or request the field <code>textAnalysis</code> to return the Semantria-powered object in your response."
	},
	"timeout": {
	"default": "Set a value in milliseconds to terminate the response. By default the {API} API has a 30-second (30000) timeout."
	},
	"token": {
	"default": "Developer token"
	},
	"url": {
	"default": "Web page URL of the {api} to process (URL encoded)"
	}
	},
	"fields": {
	"anchorUrl": {
	"default": "If the image is hyperlinked, returns the destination URL."
	},
	"attrAlt": {
	"default": "Contents of the image's <code>alt</code> attribute."
	},
	"attrTitle": {
	"default": "Contents of the image's <code>title</code> attribute."
	},
	"author": {
	"default": "{API} author.",
	"video": "{API} uploader or creator, if available."
	},
	"authorUrl": {
	"default": "URL of the author profile page, if available."
	},
	"availability": {
	"default": "Item's availability, either <code>true</code> or <code>false</code>."
	},
	"brand": {
	"default": "Item's brand name."
	},
	"breadcrumb": {
	"default": "Returns a top-level array (<code>breadcrumb</code>) of URLs and link text from page breadcrumbs."
	},
	"colors": {
	"default": "Returns an array of hex values of the dominant colors within the image.",
	"product": "Returns array of product color options."
	},
	"date": {
	"default": "Date of extracted {api}, normalized in most cases to <a href=\"http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.3\">RFC 1123 (HTTP/1.1)</a>.",
	"image": "Date associated with the image, if available in page content."
	},
	"diffbotUri": {
	"default": "Unique object ID. The <code>diffbotUri</code> is generated from the values of various {API} fields and uniquely identifies the object. This can be used for deduplication."
	},
	"discussion": {
	"default": "Article comments, as extracted by the Diffbot Discussion API. See <a href='#discussion'>below</a>.",
	"product": "Product reviews, as extracted by the Diffbot Discussion API. See <a href='#discussion'>below</a>."
	},
	"duration": {
	"default": "Duration in seconds of the {API}."
	},
	"embedUrl": {
	"default": "Embeddable URL, if available."
	},
	"estimatedDate": {
	"default": "If an article's date is ambiguous, Diffbot will attempt to estimate a more specific timestamp using various factors. This will not be generated for articles older than two days, or articles without an identified <code>date</code>."
	},
	"experimental": "The following fields are in an early beta stage:",
	"faces": {
	"default": "The x, y, height and width of coordinates of human faces. Returns null if no faces are found."
	},
	"height": {
	"default": "Height of image as (re-)sized via browser/CSS."
	},
	"html": {
	"default": "Diffbot-normalized HTML of the extracted article. Please see the <a href=\"/dev/docs/article/html\">HTML Specification</a> for a breakdown of elements and attributes returned.",
	"video": "Embeddable HTML of the video (if available), typically an <code>IFRAME</code> or <code>VIDEO</code> object."
	},
	"humanLanguage": {
	"default": "Returns the (spoken/human) language of the submitted page, using two-letter <a href=\"http://en.wikipedia.org/wiki/List_of_ISO_639-1_codes\" target=\"_blank\">ISO 639-1 nomenclature</a>.",
	"discussion": "Spoken/human language of the discussion / comment thread, using two-letter <a href=\"http://en.wikipedia.org/wiki/List_of_ISO_639-1_codes\" target=\"_blank\">ISO 639-1 nomenclature</a>."
	},
	"images": {
	"article": "Array of images, if present within the article body.",
	"default": "Array of images, if present within the {api}.",
	"subfields": {
	"diffbotUri": {
	"default": "Internal ID used for indexing."
	},
	"height": {
	"default": "Height of image as (re-)sized via browser/CSS."
	},
	"naturalHeight": {
	"default": "Raw image height, in pixels."
	},
	"naturalWidth": {
	"default": "Raw image width, in pixels."
	},
	"primary": {
	"default": "Returns <code>true</code> if image is identified as primary based on visual analysis."
	},
	"text": {
	"default": "Diffbot-determined best caption for or description of the image."
	},
	"title": {
	"default": "Description or caption of the image."
	},
	"url": {
	"default": "Fully resolved link to image. If the image <code>SRC</code> is encoded as base64 data, the complete data URI will be returned."
	},
	"width": {
	"default": "Width of image as (re-)sized via browser/CSS."
	},
	"xpath": {
	"default": "XPath expression identifying the image node."
	}
	}
	},
	"isbn": {
	"default": "International Standard Book Number (ISBN), if available."
	},
	"links": {
	"default": "Returns a top-level object (<code>links</code>) containing all hyperlinks found on the page."
	},
	"mentions": {
	"default": "Array of articles upon which the same or similar image may be found."
	},
	"meta": {
	"default": "Returns a top-level object (<code>meta</code>) containing the full contents of page <code>meta</code> tags, including sub-arrays for <a href=\"http://ogp.me/\" target=\"_new\">OpenGraph</a> tags, <a href=\"https://dev.twitter.com/docs/cards/markup-reference\" target=\"_new\">Twitter Card</a> metadata, <a href=\"http://www.schema.org\" target=\"_new\">schema.org</a> microdata, and -- if available -- <a href=\"http://www.oembed.com\" target=\"_new\">oEmbed</a> metadata.",
	"image": "Comma-separated list of image-embedded metadata (e.g., EXIF, XMP, ICC Profile), if available within the image file."
	},
	"mime": {
	"default": "MIME type, if available, as specified by the {API}'s \"Content-Type.\""
	},
	"mpn": {
	"default": "Manufacturer's Product Number."
	},
	"naturalHeight": {
	"default": "Raw image height, in pixels.",
	"video": "Raw video height, if available, in pixels."
	},
	"naturalWidth": {
	"default": "Raw image width, in pixels.",
	"video": "Raw video width, if available, in pixels."
	},
	"displayHeight": {
	"default": "Height of image as presented in the browser (and as sized via browser/CSS, if resized)."
	},
	"displayWidth": {
	"default": "Width of image as presented in the browser (and as sized via browser/CSS, if resized)."
	},
	"nextPage": {
	"default": "If {api} spans multiple pages, <code>nextPage</code> will return the subsequent page URL."
	},
	"nextPages": {
	"default": "Array of all page URLs concatenated in a multipage {api}. <a href=\"http://support.diffbot.com/automatic-apis/handling-multiple-page-articles/\">More on automatic concatenation</a>."
	},
	"numPages": {
	"default": "Number of pages automatically concatenated to form the <code>text</code> or <code>html</code> response. By default, Diffbot will automatically concatenate up to 20 pages of an {api}. <a href=\"http://support.diffbot.com/automatic-apis/handling-multiple-page-articles/\">More on automatic concatenation</a>.",
	"discussion": "Number of pages in the thread concatenated to form the <code>posts</code> response. Use <code>maxPages</code> to define how many pages to concatenate. <a href=\"http://support.diffbot.com/automatic-apis/handling-multiple-page-articles/\">More on automatic concatenation</a>."
	},
	"numPosts": {
	"default": "Number of individual posts in the thread."
	},
	"ocr": {
	"default": "If text is identified within the image, we will attempt to recognize the text string."
	},
	"offerPrice": {
	"default": "Offer or actual/final price of the product."
	},
	"offerPriceDetails": {
	"default": "<code>offerPrice</code> separated into its constituent parts: <code>amount</code>, <code>symbol</code>, and full <code>text</code>."
	},
	"optional": "Optional fields, available using <code>fields=</code> argument",
	"pageUrl": {
	"default": "URL of submitted page / page from which the {api} is extracted."
	},
	"participants": {
	"default": "Number of unique participants in the discussion thread or comments."
	},
	"posts": {
	"default": "Array of individual posts.",
	"subfields": {
	"author": {
	"default": "Name/username of the post author."
	},
	"authorUrl": {
	"default": "URL of the author profile page, if available."
	},
	"date": {
	"default": "Date of post, normalized in most cases to <a href=\"http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.3\">RFC 1123 (HTTP/1.1)</a>."
	},
	"diffbotUri": {
	"default": "Internal ID used for indexing."
	},
	"html": {
	"default": "Diffbot-normalized HTML of the extracted post. Please see the <a href=\"/dev/docs/article/html\">HTML Specification</a> for a breakdown of elements and attributes returned."
	},
	"humanLanguage": {
	"default": "Spoken/human language of the post, using two-letter <a href=\"http://en.wikipedia.org/wiki/List_of_ISO_639-1_codes\" target=\"_blank\">ISO 639-1 nomenclature</a>."
	},
	"id": {
	"default": "ID of the individual post. The first post of a thread will have an ID of 0."
	},
	"images": {
	"default": "If any images are detected within post content, they will be returned in a separate array. Individual array fields are the same as the <a href='/dev/docs/article'>Article API's</a> <code>images</code> array."
	},
	"pageUrl": {
	"default": "URL of the page on which the post was found."
	},
	"parentId": {
	"default": "ID of the parent, if the post is a reply or response."
	},
	"sentiment": {
	"default": "Returns a sentiment score from -1.0 (very negative) to 1.0 (very positive) for the individual post."
	},
	"tags": {
	"default": "If the post is long enough, an array of tags generated from its specific content."
	},
	"text": {
	"default": "Full text of the extracted post."
	},
	"type": {
	"default": "Type of element (always <code>post</code>)."
	}
	}
	},
	"prefixCode": {
	"default": "Country of origin as identified by UPC/ISBN."
	},
	"priceRange": {
	"default": "If the product is available in a range of prices, the minimum and maximum values will be returned. The lowest price will also be returned as the <code>offerPrice</code>.",
	"subfields": {
	"maxPrice": {
	"default": "The maximum price for the offered item."
	},
	"minPrice": {
	"default": "The minimum price for the offered item."
	}
	}
	},
	"productId": {
	"default": "Diffbot-determined unique product ID. If <code>upc</code>, <code>isbn</code>, <code>mpn</code> or <code>sku</code> are identified on the page, <code>productId</code> will select from these values in the above order."
	},
	"productOrigin": {
	"default": "If available, two-character ISO country code where the product was produced."
	},
	"provider": {
	"default": "Discussion service provider (e.g., Disqus, Facebook), if known."
	},
	"publisherCountry": {
	"default": "If known, the country of the article publication."
	},
	"publisherRegion": {
	"default": "If known, the region of the article publication."
	},
	"quantityPrices": {
	"default": "If the product is available with quantity-based discounts, all identifiable price points will be returned. The lowest price will also be returned as the <code>offerPrice</code>.",
	"subfields": {
	"minQuantity": {
	"default": "The minimum quantity required to purchase for the associated price."
	},
	"price": {
	"default": "Price of the specific quantity level."
	}
	}
	},
	"querystring": {
	"default": "Returns any key/value pairs present in the URL querystring. Items without a discrete value will be returned as <code>true</code>."
	},
	"regularPrice": {
	"default": "Regular or original price of the product, if available."
	},
	"regularPriceDetails": {
	"default": "<code>regularPrice</code> separated into its constituent parts: <code>amount</code>, <code>symbol</code>, and full <code>text</code>."
	},
	"resolvedPageUrl": {
	"default": "Returned if the <code>pageUrl</code> redirects to another URL."
	},
	"rssUrl": {
	"default": "URL of the {api}'s RSS feed, if available."
	},
	"saveAmount": {
	"default": "Discount or amount saved off the regular price."
	},
	"saveAmountDetails": {
	"default": "<code>saveAmount</code> separated into its constituent parts: <code>amount</code>, <code>symbol</code>, full <code>text</code>, and whether or not it is a <code>percentage</code> value."
	},
	"sentiment": {
	"default": "Returns the sentiment score of the analyzed article text, a value randing from -1.0 (very negative) to 1.0 (very positive).",
	"discussion": "Returns a sentiment score of each individual post, a value ranging from -1.0 (very negative) to 1.0 (very positive)."
	},
	"shippingAmount": {
	"default": "Shipping price."
	},
	"siteName": {
	"default": "The plain-text name of the site (e.g. <code>The New York Times</code> or <code>Diffbot</code>). If no site name is automatically determined, the root domain (<code>diffbot.com</code>) will be returned."
	},
	"size": {
	"default": "Size in bytes of the image file.",
	"product": "Size(s) available, if identified on the page."
	},
	"sku": {
	"default": "Stock Keeping Unit -- store/vendor inventory number or identifier."
	},
	"specs": {
	"default": "If a specifications table or similar data is available on the product page, individual specifications will be returned in the <code>specs</code> object as name/value pairs. Names will be normalized to lowercase with spaces replaced by underscores, e.g. <code>display_resolution</code>."
	},
	"stats": {
	"analyze": "If the <code>stats</code> argument is passed in the request, the <code>stats</code> object will be returned, including an array of page-types and associated confidence scores.",
	"default": "Returns statistics on the page evaluation and extraction."
	},
	"tags": {
	"article": "Array of tags/entities, generated from analysis of the extracted <code>text</code> and cross-referenced with <a href='http://wiki.dbpedia.org/About' target='_new'>DBpedia</a> and other data sources.",
	"default": "Array of tags/entities, generated from analysis of the extracted <code>text</code> and cross-referenced with <a href='http://wiki.dbpedia.org/About' target='_new'>DBpedia</a> and other data sources.",
	"discussion": "Array of tags/entities as generated from analysis of all extracted <code>posts</code> and cross-referenced with <a href='http://wiki.dbpedia.org/About' target='_new'>DBpedia</a> and other data sources.",
	"subfields": {
	"count": {
	"default": "Number of appearances the entity makes within the text content."
	},
	"label": {
	"default": "Name of the entity or tag."
	},
	"prevalence": {
	"default": "Ratio of the number of appearances of this entity compared to those of all entities within the text content."
	},
	"type": {
	"default": "Link to the entity type, if identified, most commonly at DBpedia."
	},
	"uri": {
	"default": "Link to the entity at DBpedia or other data source."
	}
	}
	},
	"text": {
	"default": "Full text of the {api}.",
	"product": "Text description, if available, of the {api}.",
	"video": "Text description, if available, of the {api}."
	},
	"textNode": {
	"default": "MIME type, if available, as specified by the image's \"Content-Type.\""
	},
	"title": {
	"analyze": "Title of the page.",
	"default": "Title of the {api}.",
	"disussion": "Title of the discussion thread or parent post.",
	"image": "Title or caption of the image, if available."
	},
	"type": {
	"analyze": "Page-type of the submitted URL, either <code>article</code>, <code>image</code>, <code>product</code> or <code>other</code>.",
	"default": "Type of object (always <code>{api}</code>)."
	},
	"upc": {
	"default": "Universal Product Code (UPC/EAN), if available."
	},
	"url": {
	"default": "Direct link to image file.",
	"video": "Direct link to source video file, if available."
	},
	"videos": {
	"article": "Array of videos, if present within the article body.",
	"default": "Array of videos, if present within the {api}.",
	"subfields": {
	"diffbotUri": {
	"default": "Internal ID used for indexing."
	},
	"naturalHeight": {
	"default": "Source video height, in pixels, if available."
	},
	"naturalWidth": {
	"default": "Source video width, in pixels, if available."
	},
	"primary": {
	"default": "Returns <code>true</code> if video is identified as primary based on visual analysis."
	},
	"url": {
	"default": "Fully resolved link to source video content."
	}
	}
	},
	"viewCount": {
	"default": "Number of {API} views, if available on the page."
	},
	"width": {
	"default": "Width of image as (re-)sized via browser/CSS."
	},
	"xpath": {
	"default": "XPath expression identifying the {api} node."
	}
	}
	}
	#!/usr/bin/env python
	# -- coding: utf-8 --
	# write out HTMl based on replacement code

	import sys
	sys.path.append('/Users/john/svn/Miscellaneous/john')
	sys.path.append('/home/jdavi/svn/john')
	from jdhelpers import *
	import cgi
	import collections
	import codecs

	fields_file = open('docFields.txt','r+')
	table_types = json.load(fields_file)
	fields_file.seek(0)
	fields_file.write(json.dumps(table_types,indent=4,sort_keys=True))
	fields_file.close()

	api_files = {}

	def parseFields(text):
	'''This parses the fields specifically, including subfields'''

	open = False
	fields = []
	placeholder = ""
	for i,item in enumerate(text.split(',')):
	if open == False:
	if ("(" not in item) and (")" not in item):
	fields.append(item)
	else:
	placeholder = item
	open = True
	else:
	placeholder = ",".join([placeholder,item])
	if ")" in item:
	open = False
	fields.append(placeholder)
	return fields

	def parseLines(text):
	'''This parses an entire line from the apis.txt doc-configurator'''

	apis = {}
	for line in text:
	if ":" not in line:
	api = line.strip('\n').split (' ')[0]
	api_filename = line.strip('\n').split(' ')[1]
	apis[api] = {}
	api_files[api] = api_filename

	elif line.count(":") == 1:
	_line = line.strip('\n').replace(' ','')
	section = _line.split(':')[0]
	table = section.split('_')[0]
	try:
	apis[api][table]
	except:
	apis[api][table] = {}
	table_type = section.split('_')[1]
	fields = parseFields(_line.split(':')[1])
	apis[api][table][table_type] = fields

	# parse a setting with a URL
	else:
	_line = line.strip('\n').replace(' ','')
	section = _line.split(':')[0]
	data_type = section.split('_')[0]
	try:
	apis[api][data_type]
	except:
	apis[api][data_type] = {}
	slot = section.split('_')[1]
	url = "".join(_line.split(':',1)[1:(len(_line)+1)])
	print url
	apis[api][data_type][slot] = url
	return apis

	def parseSubfields(text):
	text = text.rstrip(')').split('(')
	return text[1].split(',')

	class writeHtml(object):

	def __init__(self,api,table_type):
	self.filename = "%s_%s.html" % (api,table_type)
	with codecs.open(self.filename,'w',encoding='utf8') as fo:
	fo.close()

	self.html_holder = ""
	self.api = api
	self.table_type = table_type

	def append(self,markup):
	fo = codecs.open(self.filename,'a','utf-8')
	fo.write("%s\n" % markup)
	fo.close()

	self.html_holder += markup

	def open(self,type):
	if "codesample" in type:
	self.append('<pre>')
	else:
	table_header = type.rstrip('s').capitalize()
	self.append('<table class="controls table table-bordered" id="%s" border="0" cellpadding="5">\n' % type)
	self.append('\t<thead><tr><th>%s</th><th>Description</th></tr></thead>\n' % table_header)

	def close(self,type):
	if "codesample" in type:
	self.append('</pre>')
	else:
	self.append("</table>")

	def file_insert(self):
	filename = api_files[self.api]
	fo = codecs.open(filename,'r+',encoding='utf8')
	text = fo.read()
	key_start = "<!--{%s}-->" % self.table_type
	key_end = "<!--{end%s}-->" % self.table_type
	if key_start in text and key_end in text:
	before_text = text.partition(key_start)[0]
	after_text = text.partition(key_end)[2]
	text = before_text + key_start + self.html_holder + key_end + after_text
	fo.seek(0)
	fo.write(text)
	fo.close()

	def getRow(title,api,table_type,section_type,parent="",parentRow=False):
	'''This function takes the field and returns a row of HTML, either a regular row,
	a title row ("images"), or an indented row ("images > pixelHeight")'''

	if parent != "":
	try:
	description = table_types[table_type][parent]['subfields'][title][api].format(api=api,API=api.capitalize())
	except:
	description = table_types[table_type][parent]['subfields'][title]['default'].format(api=api,API=api.capitalize())
	parent = "%s indent" % parent # set this for the class name
	else:
	try:
	description = table_types[table_type][title][api].format(api=api,API=api.capitalize())
	except KeyError:
	description = table_types[table_type][title]['default'].format(api=api,API=api.capitalize())

	if parentRow == True:
	parent = "parent"

	html = '''
	<tr>
	<td class="{className}"><code>{title}</code></td>
	<td class="{className} {sectionType}"><div>{description}</div></td>
	</tr>'''.format(className=parent,sectionType=section_type,title=title,description=description)

	return html

	def headerRow(section_type,table_type):
	try:
	header_text = table_types[table_type][section_type]
	except:
	return None
	html = '''

	<tr>
	<td colspan="2" class="header">{header_text}</td>
	</tr>'''.format(header_text=header_text)

	return html

	def cleanResponse(text):
	text = text.replace('doctoken','sampletoken')
	text = text.replace('john@diffbot.com','support@diffbot.com')
	text = text.replace('?mode=high-precision','')
	text = text.replace('\u2019','\'')
	text = cgi.escape(text)
	print text
	return text

	def getSample(url):
	r = requests.get(url)
	j = json.loads(r.text, object_pairs_hook=collections.OrderedDict)
	if "article" in url:
	fo = codecs.open('text.html','w','utf-8')
	fo.write(json.dumps(j,indent=2))
	fo.close()
	text = json.dumps(j,indent=2)
	return cleanResponse(text)

	def main():

	fo = open('docApis.txt','r')
	apis = parseLines(fo)

	for api in apis.keys():

	if single_api:
	if api != single_api:
	continue

	#html = writeHtml(api)
	for docsection_type in apis[api].keys():

	if docsection_type != "codesample":
	html = writeHtml(api,docsection_type)
	html.open(docsection_type)
	for section_type in apis[api][docsection_type].keys():
	try:
	rows = apis[api][docsection_type][section_type]
	except:
	continue

	header = headerRow(section_type,docsection_type)
	if header != None:
	html.append(header)

	for row in rows:
	if "(" in row:
	parent = row.split('(')[0]
	html.append(getRow(parent,api,docsection_type,section_type,parentRow=True))
	subfields = parseSubfields(row)
	for subfield in subfields:
	html.append(getRow(subfield,api,docsection_type,section_type,parent=parent))
	else:
	html.append(getRow(row,api,docsection_type,section_type))
	html.close(docsection_type)
	html.file_insert()
	else:
	for samplenumber in apis[api][docsection_type]:
	name = "%s%s" % (docsection_type,samplenumber)
	html = writeHtml(api,name)
	html.open(name)
	preview = getSample(apis[api][docsection_type][samplenumber])
	html.append(preview)
	html.close(name)
	print preview
	html.file_insert()

	# parse args
	parser = argparse.ArgumentParser()
	parser.add_argument('-a', '--api', required=False, help="Pass a single API to generate that documentation only")
	args = parser.parse_args()
	single_api = args.api

	if __name__ == '__main__':
	print('Executed from the command line')
	main()