jvwong/README.md

## README.md

      
    Raw
  

              README.md
            
          
    Pathway Commons Web Service Test Suite

This is a sample test suite for the Pathway Commons web service written in python.
Description

Each example query on the web service description page is called programmatically and a sanity test is performed on each response. A .csv report summarizing these responses is dumped to the parent directory:
type,date,method,status_code,path,query,elapsed,content-type,records,error
Get,2016-08-07 14:17:21,GET,200,/pc2/get,uri=http://identifiers.org/uniprot/Q06609,0.22710490226745605,application/vnd.biopax.rdf+xml;charset=UTF-8,33095,
Get,2016-08-07 14:17:22,GET,200,/pc2/get,uri=COL5A1,0.6672849655151367,application/vnd.biopax.rdf+xml;charset=UTF-8,2930,
Get,2016-08-07 14:17:22,GET,200,/pc2/get,uri=http://identifiers.org/reactome/R-HSA-201451,0.7181229591369629,application/vnd.biopax.rdf+xml;charset=UTF-8,712076,
Graph,2016-08-07 14:17:32,GET,200,/pc2/graph,source=P20908&kind=neighborhood,9.318971157073975,application/vnd.biopax.rdf+xml;charset=UTF-8,19377310,
Graph,2016-08-07 14:17:55,GET,200,/pc2/graph,source=COL5A1&kind=neighborhood,23.3385009765625,application/vnd.biopax.rdf+xml;charset=UTF-8,19377310,
...
Requirements

You will need to have python <= version 3.3 installed for compatibility with the xml-related library.
Installation

Use a python environment tool to install python and associated-packages as declared in conda-requirements.txt.
If using conda:
$ conda create --name pctest --file conda-requirements.txt
$ source activate pctest
Run the tests from the command-line (alter the path to point to you anaconda python distribution):
$ ~/anaconda/envs/pctest/bin/python3.3 -m unittest discover -v -s .
or using the supplied bash script (you'll need to update this script to point to your anaconda python distribution):
$ ./test.sh

  
## base.py
import unittest
from requests.exceptions import Timeout, RequestException
import requests
import os
from urllib import parse
import csv
import time
from bs4 import BeautifulSoup

BASE_DIR = os.path.abspath(os.path.dirname(__file__))
REPORTS_DIR = os.path.join(BASE_DIR, '.')
TIMEOUT_LIMIT = 120

if not os.path.exists(REPORTS_DIR):
    os.makedirs(REPORTS_DIR)


class BaseTest(unittest.TestCase):
    @classmethod
    def setUpClass(cls):
        super(BaseTest, cls).setUpClass()

        # Create a csv file to write out
        cls.filename = os.path.abspath(os.path.join(REPORTS_DIR, 'results_' + time.strftime("%Y-%m-%d") + '.csv'))
        # csv file header names
        cls.fieldnames = ['type', 'date', 'method', 'status_code',
                          'path', 'query', 'elapsed', 'content-type', 'records', 'error']
        # webservice url
        cls.url = 'http://www.pathwaycommons.org/pc2/'

        # content-type headers
        cls.headers_json = {'Content-Type': 'application/json'}
        cls.headers_xml = {'Content-Type': 'application/xml'}
        cls.headers_biopax = {'Content-Type': 'application/vnd.biopax.rdf+xml'}
        cls.headers_plain = {'Content-Type': 'text/plain'}

        exists = os.path.isfile(cls.filename)
        if not exists:
            with open(cls.filename, 'w+') as csvfile:
                writer = csv.DictWriter(csvfile, fieldnames=cls.fieldnames)
                writer.writeheader()

    @classmethod
    def tearDownClass(cls):
        super(BaseTest, cls).tearDownClass()

    def setUp(self):
        super(BaseTest, self).setUp()

    def tearDown(self):
        super(BaseTest, self).tearDown()

    def get_test(self, type, query):
        path = parse.urljoin(self.url, query)
        start = time.time()
        # Make the request
        try:
            start = time.time()
            response = requests.get(path, timeout=TIMEOUT_LIMIT)
            elapsed = time.time() - start
            self.onSuccess(type, 'GET', response, elapsed)
        except Timeout:
            elapsed = time.time() - start
            self.onFailure(type, 'GET', path, elapsed, 'Timeout')
        except RequestException:
            elapsed = time.time() - start
            self.onFailure(type, 'GET', path, elapsed, 'Request Failure')

    def onFailure(self, type, method, path, elapsed, error):
        # extract the parsed response url
        rurl = parse.urlparse(path)
        results = dict()
        results['type'] = type
        results['date'] = time.strftime("%Y-%m-%d %H:%M:%S")
        results['method'] = method
        results['path'] = rurl.path
        results['query'] = rurl.query
        results['elapsed'] = elapsed
        results['error'] = error
        self.writeout(results)

    def onSuccess(self, type, method, response, elapsed):
        doPrint = False

        # extract the parsed response url
        rurl = parse.urlparse(response.url)

        # Store the results
        results = dict()
        results['type']	= type
        results['date']	= time.strftime("%Y-%m-%d %H:%M:%S")
        results['method'] = method
        results['path'] = rurl.path
        results['query'] = rurl.query
        results['status_code'] = response.status_code
        results['elapsed'] = elapsed
        results['content-type'] = response.headers['Content-Type']

        # print(response.headers['Content-Type'])
        # For more info on response content - pathwaycommons.org/pc2/help/schema
        # application/json
        if self.headers_json['Content-Type'] in results['content-type']:
            try:
                jsonOut = response.json()
                results['records'] = jsonOut['numHits']
                doPrint = True
            except ValueError:
                print('Error decoding JSON')

        # application/xml - TraverseResponse; SearchResponse
        if self.headers_xml['Content-Type'] in results['content-type']:
            try:
                soup = BeautifulSoup(response.text, 'xml')
                if type == 'Search':
                    results['records'] = soup.searchResponse['numHits']
                elif type == 'Traverse':
                    results['records'] = len(soup.find_all('traverseResponse', limit=1))
                doPrint = True
            except ValueError:
                print('Error decoding XML')

        # application/vnd.biopax.rdf+xml
        if self.headers_biopax['Content-Type'] in results['content-type']:
            try:
                results['records'] = response.headers['Content-Length']
                doPrint = True
            except ValueError:
                print('Error decoding application/vnd.biopax.rdf+xml')

        # catch all - text/plain
        if self.headers_plain['Content-Type'] in results['content-type']:
            try:
                results['records'] = response.headers['Content-Length']
                doPrint = True
            except ValueError:
                print('Error decoding text/plain')

        if doPrint:
            self.writeout(results)

    def writeout(self, results):
        with open(self.filename, 'a') as csvfile:
            writer = csv.DictWriter(csvfile, fieldnames=self.fieldnames)
            writer.writerow(results)


## conda-requirements.txt
# This file may be used to create an environment using:
# $ conda create --name <env> --file <this file>
# platform: osx-64
beautiful-soup=4.3.2=py33_0
libxml2=2.9.2=0
libxslt=1.1.28=2
lxml=3.4.4=py33_0
openssl=1.0.1k=1
pip=8.0.3=py33_0
python=3.3.5=3
readline=6.2=2
requests=2.9.1=py33_0
setuptools=20.1.1=py33_0
sqlite=3.9.2=0
tk=8.5.18=0
wheel=0.29.0=py33_0
xz=5.0.5=1
zlib=1.2.8=0

## test.sh
#!/bin/bash

# Alter this file to point to your python distribution
$HOME/anaconda/envs/pctest/bin/python3.3 -m unittest discover -v -s .

## test_api.py
from base import BaseTest


class Search(BaseTest):

    def test_search(self):
        paths = (
            'search.xml?q=Q06609',
            'search.json?q=Q06609',
            'search?q=xrefid:Q06609',
            'search.json?q=Q06609&type=pathway',
            'search?q=brca2&type=proteinreference&organism=homo%20sapiens&datasource=pid',
            "search.xml?q=name:'col5a1'&type=proteinreference&organism=9606",
            'search?q=brc*&type=control&organism=9606&datasource=reactome',
            'search?q=a*&page=3',
            'search?q=+binding%20NOT%20transcription*&type=control&page=0',
            'search?q=pathway:immune*&type=conversion',
            'search?q=*&type=pathway&datasource=reactome',
            'search?q=*&type=biosource'
        )

        for path in paths:
            self.get_test(self.__class__.__name__, path)


class Get(BaseTest):

    def test_get(self):
        paths = (
            'get?uri=http://identifiers.org/uniprot/Q06609',
            'get?uri=COL5A1',
            'get?uri=http://identifiers.org/reactome/R-HSA-201451'
        )

        for path in paths:
            self.get_test(self.__class__.__name__, path)


class Graph(BaseTest):

    def test_graph(self):
        paths = (
            'graph?source=P20908&kind=neighborhood',
            'graph?source=COL5A1&kind=neighborhood',
            'graph?source=http://identifiers.org/uniprot/P20908&kind=neighborhood&format=BINARY_SIF',
        )

        for path in paths:
            self.get_test(self.__class__.__name__, path)


class Traverse(BaseTest):

    def test_traverse(self):
        paths = (
            'traverse?uri=http://identifiers.org/uniprot/P38398&path=ProteinReference/organism/displayName',
            'traverse?uri=http://identifiers.org/uniprot/P38398&uri=http://identifiers.org/uniprot/Q06609&path=ProteinReference/organism',
            'traverse?uri=http://identifiers.org/uniprot/Q06609&path=ProteinReference/entityReferenceOf:Protein/name',
            'traverse?uri=http://identifiers.org/uniprot/P38398&path=ProteinReference/entityReferenceOf:Protein',
            'traverse?uri=http://identifiers.org/uniprot/P38398&uri=http://identifiers.org/taxonomy/9606&path=Named/name'
        )

        for path in paths:
            self.get_test(self.__class__.__name__, path)


class Top(BaseTest):

    def test_top(self):
        paths = (
            'top_pathways',
            'top_pathways.json'
        )

        for path in paths:
            self.get_test(self.__class__.__name__, path)
	import unittest
	from requests.exceptions import Timeout, RequestException
	import requests
	import os
	from urllib import parse
	import csv
	import time
	from bs4 import BeautifulSoup

	BASE_DIR = os.path.abspath(os.path.dirname(__file__))
	REPORTS_DIR = os.path.join(BASE_DIR, '.')
	TIMEOUT_LIMIT = 120

	if not os.path.exists(REPORTS_DIR):
	os.makedirs(REPORTS_DIR)


	class BaseTest(unittest.TestCase):
	@classmethod
	def setUpClass(cls):
	super(BaseTest, cls).setUpClass()

	# Create a csv file to write out
	cls.filename = os.path.abspath(os.path.join(REPORTS_DIR, 'results_' + time.strftime("%Y-%m-%d") + '.csv'))
	# csv file header names
	cls.fieldnames = ['type', 'date', 'method', 'status_code',
	'path', 'query', 'elapsed', 'content-type', 'records', 'error']
	# webservice url
	cls.url = 'http://www.pathwaycommons.org/pc2/'

	# content-type headers
	cls.headers_json = {'Content-Type': 'application/json'}
	cls.headers_xml = {'Content-Type': 'application/xml'}
	cls.headers_biopax = {'Content-Type': 'application/vnd.biopax.rdf+xml'}
	cls.headers_plain = {'Content-Type': 'text/plain'}

	exists = os.path.isfile(cls.filename)
	if not exists:
	with open(cls.filename, 'w+') as csvfile:
	writer = csv.DictWriter(csvfile, fieldnames=cls.fieldnames)
	writer.writeheader()

	@classmethod
	def tearDownClass(cls):
	super(BaseTest, cls).tearDownClass()

	def setUp(self):
	super(BaseTest, self).setUp()

	def tearDown(self):
	super(BaseTest, self).tearDown()

	def get_test(self, type, query):
	path = parse.urljoin(self.url, query)
	start = time.time()
	# Make the request
	try:
	start = time.time()
	response = requests.get(path, timeout=TIMEOUT_LIMIT)
	elapsed = time.time() - start
	self.onSuccess(type, 'GET', response, elapsed)
	except Timeout:
	elapsed = time.time() - start
	self.onFailure(type, 'GET', path, elapsed, 'Timeout')
	except RequestException:
	elapsed = time.time() - start
	self.onFailure(type, 'GET', path, elapsed, 'Request Failure')

	def onFailure(self, type, method, path, elapsed, error):
	# extract the parsed response url
	rurl = parse.urlparse(path)
	results = dict()
	results['type'] = type
	results['date'] = time.strftime("%Y-%m-%d %H:%M:%S")
	results['method'] = method
	results['path'] = rurl.path
	results['query'] = rurl.query
	results['elapsed'] = elapsed
	results['error'] = error
	self.writeout(results)

	def onSuccess(self, type, method, response, elapsed):
	doPrint = False

	# extract the parsed response url
	rurl = parse.urlparse(response.url)

	# Store the results
	results = dict()
	results['type'] = type
	results['date'] = time.strftime("%Y-%m-%d %H:%M:%S")
	results['method'] = method
	results['path'] = rurl.path
	results['query'] = rurl.query
	results['status_code'] = response.status_code
	results['elapsed'] = elapsed
	results['content-type'] = response.headers['Content-Type']

	# print(response.headers['Content-Type'])
	# For more info on response content - pathwaycommons.org/pc2/help/schema
	# application/json
	if self.headers_json['Content-Type'] in results['content-type']:
	try:
	jsonOut = response.json()
	results['records'] = jsonOut['numHits']
	doPrint = True
	except ValueError:
	print('Error decoding JSON')

	# application/xml - TraverseResponse; SearchResponse
	if self.headers_xml['Content-Type'] in results['content-type']:
	try:
	soup = BeautifulSoup(response.text, 'xml')
	if type == 'Search':
	results['records'] = soup.searchResponse['numHits']
	elif type == 'Traverse':
	results['records'] = len(soup.find_all('traverseResponse', limit=1))
	doPrint = True
	except ValueError:
	print('Error decoding XML')

	# application/vnd.biopax.rdf+xml
	if self.headers_biopax['Content-Type'] in results['content-type']:
	try:
	results['records'] = response.headers['Content-Length']
	doPrint = True
	except ValueError:
	print('Error decoding application/vnd.biopax.rdf+xml')

	# catch all - text/plain
	if self.headers_plain['Content-Type'] in results['content-type']:
	try:
	results['records'] = response.headers['Content-Length']
	doPrint = True
	except ValueError:
	print('Error decoding text/plain')

	if doPrint:
	self.writeout(results)

	def writeout(self, results):
	with open(self.filename, 'a') as csvfile:
	writer = csv.DictWriter(csvfile, fieldnames=self.fieldnames)
	writer.writerow(results)
	# This file may be used to create an environment using:
	# $ conda create --name <env> --file <this file>
	# platform: osx-64
	beautiful-soup=4.3.2=py33_0
	libxml2=2.9.2=0
	libxslt=1.1.28=2
	lxml=3.4.4=py33_0
	openssl=1.0.1k=1
	pip=8.0.3=py33_0
	python=3.3.5=3
	readline=6.2=2
	requests=2.9.1=py33_0
	setuptools=20.1.1=py33_0
	sqlite=3.9.2=0
	tk=8.5.18=0
	wheel=0.29.0=py33_0
	xz=5.0.5=1
	zlib=1.2.8=0
	#!/bin/bash

	# Alter this file to point to your python distribution
	$HOME/anaconda/envs/pctest/bin/python3.3 -m unittest discover -v -s .
	from base import BaseTest


	class Search(BaseTest):

	def test_search(self):
	paths = (
	'search.xml?q=Q06609',
	'search.json?q=Q06609',
	'search?q=xrefid:Q06609',
	'search.json?q=Q06609&type=pathway',
	'search?q=brca2&type=proteinreference&organism=homo%20sapiens&datasource=pid',
	"search.xml?q=name:'col5a1'&type=proteinreference&organism=9606",
	'search?q=brc*&type=control&organism=9606&datasource=reactome',
	'search?q=a*&page=3',
	'search?q=+binding%20NOT%20transcription*&type=control&page=0',
	'search?q=pathway:immune*&type=conversion',
	'search?q=*&type=pathway&datasource=reactome',
	'search?q=*&type=biosource'
	)

	for path in paths:
	self.get_test(self.__class__.__name__, path)


	class Get(BaseTest):

	def test_get(self):
	paths = (
	'get?uri=http://identifiers.org/uniprot/Q06609',
	'get?uri=COL5A1',
	'get?uri=http://identifiers.org/reactome/R-HSA-201451'
	)

	for path in paths:
	self.get_test(self.__class__.__name__, path)


	class Graph(BaseTest):

	def test_graph(self):
	paths = (
	'graph?source=P20908&kind=neighborhood',
	'graph?source=COL5A1&kind=neighborhood',
	'graph?source=http://identifiers.org/uniprot/P20908&kind=neighborhood&format=BINARY_SIF',
	)

	for path in paths:
	self.get_test(self.__class__.__name__, path)


	class Traverse(BaseTest):

	def test_traverse(self):
	paths = (
	'traverse?uri=http://identifiers.org/uniprot/P38398&path=ProteinReference/organism/displayName',
	'traverse?uri=http://identifiers.org/uniprot/P38398&uri=http://identifiers.org/uniprot/Q06609&path=ProteinReference/organism',
	'traverse?uri=http://identifiers.org/uniprot/Q06609&path=ProteinReference/entityReferenceOf:Protein/name',
	'traverse?uri=http://identifiers.org/uniprot/P38398&path=ProteinReference/entityReferenceOf:Protein',
	'traverse?uri=http://identifiers.org/uniprot/P38398&uri=http://identifiers.org/taxonomy/9606&path=Named/name'
	)

	for path in paths:
	self.get_test(self.__class__.__name__, path)


	class Top(BaseTest):

	def test_top(self):
	paths = (
	'top_pathways',
	'top_pathways.json'
	)

	for path in paths:
	self.get_test(self.__class__.__name__, path)