Skip to content

Instantly share code, notes, and snippets.

@zqqf16
Last active December 28, 2015 07:59
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save zqqf16/7468434 to your computer and use it in GitHub Desktop.
Save zqqf16/7468434 to your computer and use it in GitHub Desktop.
获取天弘基金以及华夏基金最近5天收益信息
#!/usr/bin/env python
# -*- coding:utf-8 -*-
import re
import requests
class Spider(object):
def __init__(self, url, pattern, encoding):
self.url = url
self.pattern = pattern
self.encoding = encoding
def crawl(self):
r = requests.get(self.url)
if r.status_code != 200:
print('HTTP error({})!'.format(r.status_code))
return
r.encoding = self.encoding
return self.pattern.findall(r.text)
th_re = re.compile(r'''<tr>.+?
<td>(\d+-\d+-\d+)</td>.+?
<td>([\d.]+?)</td>.+?
<td>([\d.]+?)%</td>.+?
</tr>''', re.S|re.X)
tianhong = ['http://www.thfund.com.cn/website/hd/zlb/newzlbrev2.jsp', th_re, 'utf-8']
hx_re = re.compile(r'''<tr.+?>.+?
<td.+?>(\d+-\d+-\d+)</td>.+?
<td.+?>([\d.]+?)</td>.+?
<td.+?>([\d.]+?)</td>.+?
<td.+?>.+?</td>.+?
<td.+?>.+?</td>.+?
</tr>''', re.S|re.X)
huaxia = ['http://www.chinaamc.com/fund/003003/index.shtml', hx_re, 'gb2312']
if __name__ == '__main__':
import colorit
hx = Spider(*huaxia).crawl()[:5]
th = Spider(*tianhong).crawl()[:5]
green = colorit.paint('green')
red = colorit.paint('red')
print('{} {}'.format(green('huaxia'), red('tianhong')))
for i in range(5):
print('{} {}|{} {}|{}'.format(hx[i][0], green(hx[i][1]), red(th[i][1]), green(hx[i][2]), red(th[i][2])))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment