tony/chineseradicals.py

## chineseradicals.py
#!/usr/bin/env python
# -*- coding: utf8 -*-
"""Pull Chinese Radical information from HTML tables on the internet.

:license: MIT License
:author: Tony Narlock

Requirements:

- lxml, html5lib, beautifulsoup4
- pandas
- requests

"""

from __future__ import absolute_import, division, print_function, \
    with_statement, unicode_literals

import os
import pandas
import requests
from bs4 import BeautifulSoup


def radicals_archchinese():
    """Return pandas DataFrame for archchinese.com radicals."""
    url = 'http://www.archchinese.com/arch_chinese_radicals.html'
    r = requests.get(url)
    r.encoding = 'utf-8'

    soup = BeautifulSoup(r.text)
    table = soup.find("table", class_="radicaltable").prettify()

    df = pandas.io.html.read_html(
        io=table,
        skiprows=2,
        index_col=0
    )[0]

    return df


def radicals_yellowbridge():
    """Return pandas DataFrame from yellowbridge.com radicals."""
    url = 'http://www.yellowbridge.com/chinese/radicals.php'
    r = requests.get(url)
    r.encoding = 'utf-8'

    soup = BeautifulSoup(r.text)
    table = soup.find("table", class_="sortable").prettify()

    df = pandas.io.html.read_html(
        io=table,
        index_col=[0, 1, 2, 3, 4, 5],
        header=0
    )[0]

    return df


df = radicals_archchinese()
df = radicals_yellowbridge()


print(df)
	#!/usr/bin/env python
	# -- coding: utf8 --
	"""Pull Chinese Radical information from HTML tables on the internet.

	:license: MIT License
	:author: Tony Narlock

	Requirements:

	- lxml, html5lib, beautifulsoup4
	- pandas
	- requests

	"""

	from __future__ import absolute_import, division, print_function, \
	with_statement, unicode_literals

	import os
	import pandas
	import requests
	from bs4 import BeautifulSoup


	def radicals_archchinese():
	"""Return pandas DataFrame for archchinese.com radicals."""
	url = 'http://www.archchinese.com/arch_chinese_radicals.html'
	r = requests.get(url)
	r.encoding = 'utf-8'

	soup = BeautifulSoup(r.text)
	table = soup.find("table", class_="radicaltable").prettify()

	df = pandas.io.html.read_html(
	io=table,
	skiprows=2,
	index_col=0
	)[0]

	return df


	def radicals_yellowbridge():
	"""Return pandas DataFrame from yellowbridge.com radicals."""
	url = 'http://www.yellowbridge.com/chinese/radicals.php'
	r = requests.get(url)
	r.encoding = 'utf-8'

	soup = BeautifulSoup(r.text)
	table = soup.find("table", class_="sortable").prettify()

	df = pandas.io.html.read_html(
	io=table,
	index_col=[0, 1, 2, 3, 4, 5],
	header=0
	)[0]

	return df


	df = radicals_archchinese()
	df = radicals_yellowbridge()


	print(df)