sincerefly/get_weather.py

## get_weather.py
#!usr/bin/python
# -*- coding: utf-8 -*-

from bs4 import BeautifulSoup
import requests


def get_html(url , data = None):

    r = requests.get(url, timeout=10)
    r.encoding = 'utf-8'

    return r.text


def parser_data(html_text):

    result = []
    soup = BeautifulSoup(html_text, "html.parser")

    day7 = soup.find('div', {'id': '7d'})

    for day in day7('ul')[0]('li'):

        date = day.find('h1').text.encode('utf-8')
        weather = day.find_all('p')[0].text.encode('utf-8')

        if day.find_all('p')[1].find('span') is None:
            temperature_highest = ''
        else:
            temperature_highest = day.find_all('p')[1].find('span').text.encode('utf-8').replace('℃', '')

        temperature_lowest = day.find_all('p')[1].find('i').text.encode('utf-8').replace('℃', '')

        result.append((date, weather, temperature_highest, temperature_lowest))

    return result

if __name__ == '__main__':

    url ='http://www.weather.com.cn/weather/101010100.shtml'

    html = get_html(url)

    result = parser_data(html)

    for r in result:
        print r[0], r[1], r[2], r[3]
	#!usr/bin/python
	# -- coding: utf-8 --

	from bs4 import BeautifulSoup
	import requests


	def get_html(url , data = None):

	r = requests.get(url, timeout=10)
	r.encoding = 'utf-8'

	return r.text


	def parser_data(html_text):

	result = []
	soup = BeautifulSoup(html_text, "html.parser")

	day7 = soup.find('div', {'id': '7d'})

	for day in day7('ul')[0]('li'):

	date = day.find('h1').text.encode('utf-8')
	weather = day.find_all('p')[0].text.encode('utf-8')

	if day.find_all('p')[1].find('span') is None:
	temperature_highest = ''
	else:
	temperature_highest = day.find_all('p')[1].find('span').text.encode('utf-8').replace('℃', '')

	temperature_lowest = day.find_all('p')[1].find('i').text.encode('utf-8').replace('℃', '')

	result.append((date, weather, temperature_highest, temperature_lowest))

	return result

	if __name__ == '__main__':

	url ='http://www.weather.com.cn/weather/101010100.shtml'

	html = get_html(url)

	result = parser_data(html)

	for r in result:
	print r[0], r[1], r[2], r[3]