Skip to content

Instantly share code, notes, and snippets.

@Harryyan
Created April 26, 2014 05:47
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Harryyan/11312714 to your computer and use it in GitHub Desktop.
Save Harryyan/11312714 to your computer and use it in GitHub Desktop.
模拟chrome浏览器搜索关键字,返回第一页url
#!/usr/bin/python
#coding:utf-8
import urllib,urllib2,cookielib,re,sys,os,time,random
cj = cookielib.CookieJar()
str1 = 'Apple-Mon identifiant Apple' #0
str2 = 'Woolworths - Customer Satisfaction Survey'#0
vibramkey = []
beatskey=[]
vibramkey.append(str1)
beatskey.append(str2)
vibramweb=['mugentrading.net']
beatsweb=['vafaelion.com']
#allweb=['vibramweb','beatsweb']
def searchkey(key,start,keydict,times):
url="http://www.google.com/search?hl=en&q=%s&revid=33815775&sa=X&ei=X6CbT4GrIoOeiQfth43GAw&ved=0CIgBENUCKAY&start=%s" %(key,start)
try:
opener=urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
opener.addheaders = [('User-agent', 'Mozilla/5.0')]
urllib2.install_opener(opener)
req=urllib2.Request(url)
response =urllib2.urlopen(req)
content = response.read()
f=open('google','w')
f.write(content)
os.system("grep -iop '<cite>.*</cite>' google | sed 's/^<cite>//'>tmp")
tiqu=os.popen("sed 's/<\/cite>$//' tmp").readlines()
except:
changeip()
else:
total_size = sum(keydict[key])
print keydict[key]
print "the size is: ",total_size
for domain in pinpai:
print "domain is:", domain
a=1
for topDomain in tiqu:
real=topDomain.find(domain)
if real>0:
if start==0:
page=1
elif start==10:
page=2
elif start==20:
page=3
elif start==30:
page=4
else:
page=5
lastkey=key.replace("+"," ")
temp = a
a = a + sum(keydict[key][0:times])
tmp = float(total_size + 1 -a)/total_size
xinxi="%s\t\t %s\t\t Page%s,rank%s,Rank Ratio:\t%f\n" %(topDomain,lastkey,page,a,tmp)
xinxifile=open('index3.html','a')
xinxifile.write(xinxi)
xinxifile.close()
a = temp
a = a+1
def changeip():
ip=random.randint(0,2)
de="route delete -host google.com"
add="route add -host google.com eth1:%s" %ip
os.system(de)
os.system(add)
print "changip to %s" %ip
def readUrl(filename,webSites):
myfile = open(filename)
for line in myfile.readlines():
line=line.strip('\n')
webSites.append(line)
def getSize(key,start,totalCount):
url="http://www.google.com/search?hl=en&q=%s&revid=33815775&sa=X&ei=X6CbT4GrIoOeiQfth43GAw&ved=0CIgBENUCKAY&start=%s" %(key,start)
try:
opener=urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
opener.addheaders = [('User-agent', 'Mozilla/5.0')]
urllib2.install_opener(opener)
req=urllib2.Request(url)
response =urllib2.urlopen(req)
content = response.read()
f=open('google','w')
f.write(content)
os.system("grep -iop '<cite>.*</cite>' google | sed 's/^<cite>//'>tmp2")
tiqu=os.popen("sed 's/<\/cite>$//' tmp2 > tmp3").readlines()
print "i am here"
except:
changeip()
else:
totalCount.append(len(tiqu))
if __name__ == '__main__':
pinpaiid=0
keydict = {}
pageSearch = [0]
for key in vibramkey:
print key
totalCount = []
for start in pageSearch:
getSize(key,start,totalCount)
keydict[key] = totalCount
# #search key
# for x in vibramkey:
# if pinpaiid == 0:
# pinpai=vibramweb
# elif pinpaiid == 1:
# pinpai=beatsweb
# pinpaiid=pinpaiid+1
# for key in x:
# times = 0
# for start in pageSearch:
# searchkey(key,start,keydict,times)
# times += 1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment