nmilford/performline_example.py

## performline_example.py
#!/usr/bin/env python
import performline.embedded.stdlib.clients.rest.exceptions
from performline.client import Client
from glob import iglob
import argparse
import json
import sys

class PlineExportReport(object):
    def __init__(self, token=None, path='.'):
        """
        The PerformLine Compliance API can provide most of the page data
        available in the UI. You can install it via the public PyPi repository:

        `pip install -U performline`

        see:  https://github.com/PerformLine/python-performline-client

        Initialize a connection to PerformLine's API.
        Load WHOIS data from file.
        Load page_ids from glob.

        """

        self.path = path
        self.report_dict = {}
        self.report_file = "{}/report.json".format(self.path)
        self.whois_json = "{}/whois_export.json".format(self.path)

        if token:
            self.performline = Client(token)
        else:
            print "No API token provided."
            sys.exit(-1)

        try:
            with open(self.whois_json, 'r') as f:
                self.whois_data = json.load(f)
        except:
            print "Problem opening up whois data at {}.".format(self.whois_json)
            sys.exit(-1)

        self.__get_page_ids_from_files()


    def __get_page_ids_from_files(self):
        """
        A page_id is the is the primary identifier for any page object.  The
        source code export functionality in PerformLine will give you a zip file
        with the page source files in the format of $page_id.html.

        Below we just glob for anything in the path that ends in *.html, strip
        the leading path and the trailing file extension to determine all of the
        page_ids in this export, returning them in an array of ints.
        """

        self.page_ids = []
        html_glob = iglob("{}/*.html".format(self.path))

        for page in html_glob:
            self.page_ids.append(int(page.split('/')[-1].split('.')[0]))

        return self.page_ids


    def __get_whois_data(self, page_id):
        """
        The whois data provided as part of the export is a series of domains and
        thier related whois output with and array of associated page_ids.

        Below we just iterate through the domains, looking to see if it is
        associated with a particular page_id, then return thr whois data.
        """

        for domain in self.whois_data.keys():
            if page_id in self.whois_data[domain]['page_ids']:
                return self.whois_data[domain]['whois']
        return "No Whois"


    def __read_source_from_file(self, page_id):
        """
        Reads the source code into a string from disk.
        """
        try:
            with open("{}/{}.html".format(self.path, page_id), 'r') as f:
                return f.read()
        except:
            print "Problem reading HTML source file {}.html".format(page_id)
            return "No Content"

    def build(self):

        """
        Build a dict of data from the API, Whois json and the source.
        Write it to a file.

        """

        for page_id in self.page_ids:
            print "Preparing report entry for {}".format(page_id)

            report_entry = {}

            try:
                """
                # You can explore available attributes thus:
                In [1]: from performline.client import Client
                In [2]: c = Client('tokenca6e5897e27d1b43906469134b1c3eb0424')
                In [3]: c.webpages(id=3480374)
                Out[3]:
                {
                    "LastScoredAt": "2018-06-05T10:46:53.693483-04:00",
                    "CompanyId": 501,
                    "Url": "http://www.guidetoonlineschools.com/online-schools?lvl=8",
                    "CampaignId": 4330,
                    "TrafficSourceId": 7670,
                    "BrandId": 458,
                    "Score": 10,
                    "Type": "web",
                    "Id": 3480374,
                    "CreatedAt": "2015-06-16T16:46:06.824781-04:00"
                }
                In [4]: c.webpages(id=3480374).LastScoredAt
                Out[4]: u'2018-06-05T10:46:53.693483-04:00'
                In [5]: c.webpages(id=3480374).Url
                Out[5]: u'http://www.guidetoonlineschools.com/online-schools?lvl=8'
                In [6]: c.webpages(id=3480374).Score
                Out[6]: 10
                """
                page = self.performline.webpages(id=page_id)
                report_entry['score'] = page.score
                report_entry['url'] = page.url
            except performline.embedded.stdlib.clients.rest.NotFound as e:
                print "ERROR: {} was not found.".format(page_id)

            report_entry['whois'] = self.__get_whois_data(page_id)
            report_entry['source'] = self.__read_source_from_file(page_id)

            self.report_dict[page_id] = report_entry

        try:
            print "Writing report to {}".format(self.report_file)
            with open(self.report_file, 'w') as f:
                json.dump(self.report_dict, f)
        except:
            print "ERROR: Could not write report to file."


if __name__ == '__main__':
    parser = argparse.ArgumentParser(
        description='Build a report from PerformLine API and Export.'
    )

    parser.add_argument('-t', '--token', action='store', dest='token',
                       required=True, help='PerformLine API Token')
    parser.add_argument('-p', '--path', action='store', dest='path',
                       required=True, help='Path of extracted export contents.')

    args = parser.parse_args()

    if len(sys.argv[1:]) == 0:
        parser.print_help()
    else:
        report = PlineExportReport(args.token, args.path)
        report.build()
	#!/usr/bin/env python
	import performline.embedded.stdlib.clients.rest.exceptions
	from performline.client import Client
	from glob import iglob
	import argparse
	import json
	import sys

	class PlineExportReport(object):
	def __init__(self, token=None, path='.'):
	"""
	The PerformLine Compliance API can provide most of the page data
	available in the UI. You can install it via the public PyPi repository:

	`pip install -U performline`

	see: https://github.com/PerformLine/python-performline-client

	Initialize a connection to PerformLine's API.
	Load WHOIS data from file.
	Load page_ids from glob.

	"""

	self.path = path
	self.report_dict = {}
	self.report_file = "{}/report.json".format(self.path)
	self.whois_json = "{}/whois_export.json".format(self.path)

	if token:
	self.performline = Client(token)
	else:
	print "No API token provided."
	sys.exit(-1)

	try:
	with open(self.whois_json, 'r') as f:
	self.whois_data = json.load(f)
	except:
	print "Problem opening up whois data at {}.".format(self.whois_json)
	sys.exit(-1)

	self.__get_page_ids_from_files()


	def __get_page_ids_from_files(self):
	"""
	A page_id is the is the primary identifier for any page object. The
	source code export functionality in PerformLine will give you a zip file
	with the page source files in the format of $page_id.html.

	Below we just glob for anything in the path that ends in *.html, strip
	the leading path and the trailing file extension to determine all of the
	page_ids in this export, returning them in an array of ints.
	"""

	self.page_ids = []
	html_glob = iglob("{}/*.html".format(self.path))

	for page in html_glob:
	self.page_ids.append(int(page.split('/')[-1].split('.')[0]))

	return self.page_ids


	def __get_whois_data(self, page_id):
	"""
	The whois data provided as part of the export is a series of domains and
	thier related whois output with and array of associated page_ids.

	Below we just iterate through the domains, looking to see if it is
	associated with a particular page_id, then return thr whois data.
	"""

	for domain in self.whois_data.keys():
	if page_id in self.whois_data[domain]['page_ids']:
	return self.whois_data[domain]['whois']
	return "No Whois"


	def __read_source_from_file(self, page_id):
	"""
	Reads the source code into a string from disk.
	"""
	try:
	with open("{}/{}.html".format(self.path, page_id), 'r') as f:
	return f.read()
	except:
	print "Problem reading HTML source file {}.html".format(page_id)
	return "No Content"

	def build(self):

	"""
	Build a dict of data from the API, Whois json and the source.
	Write it to a file.

	"""

	for page_id in self.page_ids:
	print "Preparing report entry for {}".format(page_id)

	report_entry = {}

	try:
	"""
	# You can explore available attributes thus:
	In [1]: from performline.client import Client
	In [2]: c = Client('tokenca6e5897e27d1b43906469134b1c3eb0424')
	In [3]: c.webpages(id=3480374)
	Out[3]:
	{
	"LastScoredAt": "2018-06-05T10:46:53.693483-04:00",
	"CompanyId": 501,
	"Url": "http://www.guidetoonlineschools.com/online-schools?lvl=8",
	"CampaignId": 4330,
	"TrafficSourceId": 7670,
	"BrandId": 458,
	"Score": 10,
	"Type": "web",
	"Id": 3480374,
	"CreatedAt": "2015-06-16T16:46:06.824781-04:00"
	}
	In [4]: c.webpages(id=3480374).LastScoredAt
	Out[4]: u'2018-06-05T10:46:53.693483-04:00'
	In [5]: c.webpages(id=3480374).Url
	Out[5]: u'http://www.guidetoonlineschools.com/online-schools?lvl=8'
	In [6]: c.webpages(id=3480374).Score
	Out[6]: 10
	"""
	page = self.performline.webpages(id=page_id)
	report_entry['score'] = page.score
	report_entry['url'] = page.url
	except performline.embedded.stdlib.clients.rest.NotFound as e:
	print "ERROR: {} was not found.".format(page_id)

	report_entry['whois'] = self.__get_whois_data(page_id)
	report_entry['source'] = self.__read_source_from_file(page_id)

	self.report_dict[page_id] = report_entry

	try:
	print "Writing report to {}".format(self.report_file)
	with open(self.report_file, 'w') as f:
	json.dump(self.report_dict, f)
	except:
	print "ERROR: Could not write report to file."


	if __name__ == '__main__':
	parser = argparse.ArgumentParser(
	description='Build a report from PerformLine API and Export.'
	)

	parser.add_argument('-t', '--token', action='store', dest='token',
	required=True, help='PerformLine API Token')
	parser.add_argument('-p', '--path', action='store', dest='path',
	required=True, help='Path of extracted export contents.')

	args = parser.parse_args()

	if len(sys.argv[1:]) == 0:
	parser.print_help()
	else:
	report = PlineExportReport(args.token, args.path)
	report.build()