Skip to content

Instantly share code, notes, and snippets.

@grt1st grt1st/api_spider.py Secret
Created Oct 15, 2017

Embed
What would you like to do?
a spider of api on github
#coding:utf-8
import requests
import re
from lxml import etree
import os
import io
import pickle
import threading
import warnings
warnings.filterwarnings('ignore')
session = requests.Session()
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:55.0) Gecko/20100101 Firefox/55.0',
'Referer': 'https://github.com/',
'Host': 'github.com',
'Upgrade-Insecure-Requests': '1',
}
payload = {'commit': 'Sign in', 'login': 'xxxxx@xxx.xxx', 'password': 'xxxxxx'}
proxies = {
'http': 'http://127.0.0.1:8080',
'https': 'http://127.0.0.1:8080',
}
def see(text):
with open("./t.html", "w") as f:
f.write(text)
def get_token(text):
#<input name="authenticity_token" value="Wwc+VXo2iplcjaTzDJwyigClTyZ9FF6felko/X3330UefrKyBT1f/eny1q1qSmEgFfTm0jKv+HW7rQ5hYu84Qw==" type="hidden">
html = etree.HTML(text)
t = html.xpath("//input[@name='authenticity_token']")
try:
token = t[0].get('value')
except IndexError:
print("[+] Error: can't get login token, exit...")
os.exit()
except Exception as e:
print(e)
os.exit()
#print(token)
return token
def get_cookie(session):
if not os.path.exists("./cookies.txt"):
r = session.get("https://github.com/login", headers=headers)#, verify=False, proxies=proxies)
payload['authenticity_token'] = get_token(r.content)
r = session.post("https://github.com/session", headers=headers, data=payload)#, verify=False, proxies=proxies)
#print(r.cookies.get_dict())
#see(r.text)
else:
with open('./cookies.txt', 'rb') as f:
try:
cookies = requests.utils.cookiejar_from_dict(pickle.load(f))
except TypeError:
os.remove("./cookies.txt")
return get_cookie(session)
session.cookies=cookies
return session
def search(url, session):
r = session.get(url, headers=headers)#, verify=False, proxies=proxies)
html = etree.HTML(r.text)
block = html.xpath("//div[@class='code-list-item col-12 py-4 code-list-item-public ']")
#print("[+] Info: get item: %i" % len(block))
codes = html.xpath("//div[@class='code-list-item col-12 py-4 code-list-item-public ']/div[@class='file-box blob-wrapper']/table[@class='highlight']/tr/td[@class='blob-code blob-code-inner']")
nums = html.xpath("//div[@class='code-list-item col-12 py-4 code-list-item-public ']/div[@class='file-box blob-wrapper']/table[@class='highlight']/tr/td[@class='blob-num']/a")
if len(codes) == len(nums):
lines = []
strs = None
for i in range(len(nums)):
#print(etree.tostring(codes[i], method='text'))
try:
text = etree.tostring(codes[i], method='text')
except UnicodeEncodeError:
#print("UnicodeEncodeError")
continue
if nums[i].text == '1':
if strs is not None:
lines.append(strs)
strs = text
else:
strs = "%s \\n %s" % (strs, text)
lines.append(strs)
else:
print("[+] Error: wrong number get for codes lines, exit")
pattern = re.compile('key=(.*)[&|"|\']')
pattern1 = re.compile("\w+")
pattern2 = re.compile('%\([\w|\.|,]+')
for a in lines:
#a = a.replace(' ','')
strs = re.findall(pattern, str(a))
if len(strs) > 0:
results = strs[0].split('"')[0]
results = results.split('&')[0]
results = results.split('\'')[0]
if results == '':
continue
try:
data = re.findall(pattern1, results)[0]
except IndexError:
print(results)
continue
if data == 's':
resulresults = re.findall(pattern2, a.replace(' ',''))
lists = []
for i in results:
i = i.replace('%(', '')
i = i.split(',')
lists.extend(i)
lists = set(lists)
for i in lists:
pattern0 = re.compile("%s[=|:](.*)[\"|']" % i[:6])
results = re.findall(pattern0, a.replace(' ',''))
if len(results) > 0:
results = results[0].split('\'')[0]
print(results.split('"'))
#print(a)
elif len(data) < 32:
pattern0 = re.compile("%s[=|:](.*)[\"|']" % data[:6])
results = re.findall(pattern0, a.replace(' ',''))
if len(results) > 0:
results = results[0].split('\'')[0]
print(results.split('"'))
#print(a)
else:
print(data)
words = "https://api.shodan.io/shodan/host/ key="
session = get_cookie(session)
threads = []
for i in range(1, 21):
url = "https://github.com/search?p=%i&q=%s&type=Code" % (i, words)
t=threading.Thread(target = search, args = (url, session))
t.start()
threads.append(t)
for t in threads:
t.join()
threads = []
for i in range(21, 41):
url = "https://github.com/search?p=%i&q=%s&type=Code" % (i, words)
t=threading.Thread(target = search, args = (url, session))
t.start()
threads.append(t)
for t in threads:
t.join()
with open('./cookies.txt', 'wb') as f:
pickle.dump(requests.utils.dict_from_cookiejar(session.cookies), f)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.