Skip to content

Instantly share code, notes, and snippets.

@smartm13
Created May 16, 2017 22:46
Show Gist options
  • Save smartm13/435a404d2a6415c8b1a4b0302560527e to your computer and use it in GitHub Desktop.
Save smartm13/435a404d2a6415c8b1a4b0302560527e to your computer and use it in GitHub Desktop.
A serp script to find out rank of keyword for particular website..... beauty is final callable
def getval(s,f,t):
st=s.find(f)+len(f)
return s[st:s.find(t,st)]
def dsp(zrc):
pass#with open("C:\Users/smartm13/Desktop/op.txt.html",'w') as f:f.write(zrc)
proxies=[]
def hidemyass(config='1649468'):# and ('1870962' or '1870954')):
import requests
rg=requests.get
s=rg('http://proxylist.hidemyass.com/search-'+config+'#listable').content
from bs4 import BeautifulSoup as bsp
z=bsp(s,'html.parser')
tbody=z.find_all('td')
tb=[tbody[x] for x in range(1,len(tbody),8)]
tip=[]
tp=[tbody[x].text.strip() for x in range(2,len(tbody),8)]
tpr=[tbody[x].text.strip().lower() for x in range(6,len(tbody),8)]
for t in tb:
ip,gayab="",[]
for x in t.span.contents:
if not x.name:ip+=str(x).strip()
if x.name=='style':
gayab+=[getval(y,'.','{') for y in x.text.strip().split('\n') if 'none' in y]
continue
if x.name in ['div','span']:
if 'style' in x.attrs and 'none' in str(x['style']):continue
if 'class' in x.attrs and x['class'][0] in gayab:continue
ip+=x.text.strip()
tip.append(ip)
global proxies
proxies=[str(tpr[x])+"://"+str(tip[x])+":"+str(tp[x]) for x in range(len(tip))]
return proxies
cache={}
prxoff=0
def getprxy(new=0,tm=500,hidemyassth=1):
global cache
if not new:return cache or (getprxy(1,tm,abs(prxoff-1)) if prxoff else {})
import requests,json
try:cache={"http":hidemyass()[hidemyassth-1]} if hidemyassth else {"http":json.loads(requests.get('http://gimmeproxy.com/api/getProxy?anonymityLevel=1&supportsHttps=true&protocol=http&get=true&cookies=true&maxCheckPeriod={}'.format(tm)).content)['curl']}
except:cache={}
return cache
def gs(q,india=1,num=10,start=0):
import requests,urllib
gdomain='www.google.co.in' if india else 'www.google.com'
uac={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36'}
#cookies={} if india else requests.get('http://'+gdomain+'/ncr',proxies=getprxy(),headers=uac).cookies.get_dict()
cookies={} if india else requests.get('http://'+gdomain+'/ncr',headers=uac).cookies.get_dict()
#return requests.get('http://'+gdomain+'/search?q='+urllib.quote_plus(q)+'&num='+str(num)+'&start='+str(start),headers=uac,cookies=cookies,proxies=getprxy()).content
return requests.get('http://'+gdomain+'/search?q='+urllib.quote_plus(q)+'&num='+str(num)+'&start='+str(start),headers=uac,cookies=cookies).content
def gl(q,india=1,num=10,start=0):
from bs4 import BeautifulSoup as bsp
s=gs(q,india,num,start)
dsp(s)
check='detected unusual traffic from your computer network'
if check in s:return "CAPTCHA#FAILED"
z=bsp(s,'html.parser')
zz=z.find_all('div',id='ires')
try:
m=zz[0].find_all('cite')
#return [x.text for x in m]
mm=[x.findParent('div',attrs={'class':'g'}).find_all('h3',attrs={'class':'r'})[0].find_all('a')[0]['href'] for x in m if x.findParent('div',attrs={'class':'g'}) ]
except:print 'dlocha: q={},num={},start={},india={}'.format(q,num,start,india)
return [getval(xm,'url?q=','&') for xm in mm]
def pup(keyword,domain,india=1,num=10,start=0):
r=gl(keyword,india,num,start)
if type(r)==type('zxc'):return r
from urlparse import urlparse as urp
rank=[(x+1,1+int(start/num),r[x].encode('ascii','ignore')) for x in range(len(r)) if domain in urp(r[x]).netloc]
return rank
def rank1st(keyword,domain,india=1,gap=10,pgnostop=15):
r=[]
start=0
while (not r) and pgnostop:
r=pup(keyword,domain,india,gap,start)
start+=gap
pgnostop-=1
return r#[0,0,'Locha'] if len(r)<1 else r[0]
def beauty(keyword,domain,india=1,gap=10,pgnostop=15,sep='\t',fast=0,prxy=0,isbeauty=1):
""" AbsRank, Rank on PagNo ,url"""
global prxoff#prxy=0off,1on,2...hidemyassno
prxoff=prxy
try:
amt=100*pgnostop
absrank=rank1st(keyword,domain,india,100,pgnostop)
if type('str')==type(absrank):return "{0}{sep}{0} on {0}{sep}{1}".format(0,absrank,sep=sep)
if not len(absrank):0/0
absrank=absrank[0]
absr=absrank[0]-100+absrank[1]*100
rank=(absr%gap),1+int(absr/gap),absrank[2]
if 0 and not fast:
amt=gap*pgnostop
rank=rank1st(keyword,domain,india,gap,pgnostop)
if type('str')==type(rank):return "{0}{sep}{0} on {0}{sep}{1}".format(0,rank,sep=sep)
if not len(rank):0/0
rank=rank[0]
except ZeroDivisionError:return "{0}{sep}{0} on {0}{sep}{1}".format(0,'Locha: No result in first {} pages ({} results scanned)'.format(pgnostop,amt),sep=sep) if isbeauty else {'rank':'-2','url':'not found in 1st {} results'.format(amt)}
rrank=rank[0]
pgno=rank[1]
url=rank[2]
apgno=absrank[1]
aurl=absrank[2]
arank=absrank[0] if str(apgno)=='1' else ('~'+str(absrank[0]+gap*(apgno-1)))
furl=url if (url==aurl) else url+' OR '+aurl
if not isbeauty:return {'rank':str(absr),'url':url[2:]}
return "{0}{sep}{1} on {2}{sep}{3}".format(arank,rrank,pgno,furl,sep=sep)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment