intrd/int_wiki.py

## int_wiki.py
## Wikipedia parser (birth data extractor)
# @author intrd - http://dann.com.br/ (based on @JBernardo's suggestion http://stackoverflow.com/a/12250675)
# @license Creative Commons Attribution-ShareAlike 4.0 International License - http://creativecommons.org/licenses/by-sa/4.0/

import re, requests
from bs4 import BeautifulSoup

def wikiGet(name):
	url = 'http://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvsection=0&titles='+name+'&format=xml'
	res = requests.get(url)
	soup = BeautifulSoup(res.text, "xml")
	#print soup.getText()
	birth_re = re.search(r'(Birth date(.*?)}})', soup.revisions.getText())
	birth_data = birth_re.group(0).split('|')
	print birth_data
	print len(birth_data[2])
	if len(birth_data[2]) == 4:
		return birth_data[2]
	else:
		return birth_data[1]

#dyear = wikiGet("Albert_Einstein")
#dyear = wikiGet("Daniel_Bleichenbacher")
#print dyear
	## Wikipedia parser (birth data extractor)
	# @author intrd - http://dann.com.br/ (based on @JBernardo's suggestion http://stackoverflow.com/a/12250675)
	# @license Creative Commons Attribution-ShareAlike 4.0 International License - http://creativecommons.org/licenses/by-sa/4.0/

	import re, requests
	from bs4 import BeautifulSoup

	def wikiGet(name):
	url = 'http://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvsection=0&titles='+name+'&format=xml'
	res = requests.get(url)
	soup = BeautifulSoup(res.text, "xml")
	#print soup.getText()
	birth_re = re.search(r'(Birth date(.*?)}})', soup.revisions.getText())
	birth_data = birth_re.group(0).split('\|')
	print birth_data
	print len(birth_data[2])
	if len(birth_data[2]) == 4:
	return birth_data[2]
	else:
	return birth_data[1]

	#dyear = wikiGet("Albert_Einstein")
	#dyear = wikiGet("Daniel_Bleichenbacher")
	#print dyear