datacorner/gist:47f99b6674fbe033a0f82e0405a94dc3

## gistfile1.txt
import requests
import lxml.html as lh
import pandas as pd

# URL
url = '...'

# XPath content to collect
tags = ['//a[@class="XX"]', \
        '//p[@class="XX"]' , \
        '//span[@class="XX"]', \
        '//span[@class="XX"]', \
        '//span[@class="XX"]']
cols = ['col1', \
        'col2' , \
        'col3', \
        'col4', \
        'col4']

# This function scrap one html page by gathering the XPath data (using the tags array) and give back a Pandas DataFrame
def scrapHtmlPage(url):
    page = requests.get(url)
    doc = lh.fromstring(page.content)

    # Get the Web data via XPath
    content = []
    for i in range(len(tags)):
        content.append(doc.xpath(tags[i]))

    # Gather the data into a Pandas DataFrame array
    df_liste = []
    for j in range(len(tags)):
        tmp = pd.DataFrame([content[j][i].text_content().strip() for i in range(len(content[i]))], columns=[cols[j]])
        tmp['key'] = tmp.index
        df_liste.append(tmp)

    # Build the unique Dataframe with one tag (xpath) content per column
    liste = df_liste[0]
    for j in range(len(tags)-1):
        liste = liste.join(df_liste[j+1], on='key', how='left', lsuffix='_l', rsuffix='_r')
        liste['key'] = liste.index
        del liste['key_l']
        del liste['key_r']

    return liste
	import requests
	import lxml.html as lh
	import pandas as pd

	# URL
	url = '...'

	# XPath content to collect
	tags = ['//a[@class="XX"]', \
	'//p[@class="XX"]' , \
	'//span[@class="XX"]', \
	'//span[@class="XX"]', \
	'//span[@class="XX"]']
	cols = ['col1', \
	'col2' , \
	'col3', \
	'col4', \
	'col4']

	# This function scrap one html page by gathering the XPath data (using the tags array) and give back a Pandas DataFrame
	def scrapHtmlPage(url):
	page = requests.get(url)
	doc = lh.fromstring(page.content)

	# Get the Web data via XPath
	content = []
	for i in range(len(tags)):
	content.append(doc.xpath(tags[i]))

	# Gather the data into a Pandas DataFrame array
	df_liste = []
	for j in range(len(tags)):
	tmp = pd.DataFrame([content[j][i].text_content().strip() for i in range(len(content[i]))], columns=[cols[j]])
	tmp['key'] = tmp.index
	df_liste.append(tmp)

	# Build the unique Dataframe with one tag (xpath) content per column
	liste = df_liste[0]
	for j in range(len(tags)-1):
	liste = liste.join(df_liste[j+1], on='key', how='left', lsuffix='_l', rsuffix='_r')
	liste['key'] = liste.index
	del liste['key_l']
	del liste['key_r']

	return liste