-
-
Save grt1st/c21d24d9fb3e9eefae50cc37be284c97 to your computer and use it in GitHub Desktop.
a spider of api on github
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#coding:utf-8 | |
import requests | |
import re | |
from lxml import etree | |
import os | |
import io | |
import pickle | |
import threading | |
import warnings | |
warnings.filterwarnings('ignore') | |
session = requests.Session() | |
headers = { | |
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:55.0) Gecko/20100101 Firefox/55.0', | |
'Referer': 'https://github.com/', | |
'Host': 'github.com', | |
'Upgrade-Insecure-Requests': '1', | |
} | |
payload = {'commit': 'Sign in', 'login': 'xxxxx@xxx.xxx', 'password': 'xxxxxx'} | |
proxies = { | |
'http': 'http://127.0.0.1:8080', | |
'https': 'http://127.0.0.1:8080', | |
} | |
def see(text): | |
with open("./t.html", "w") as f: | |
f.write(text) | |
def get_token(text): | |
#<input name="authenticity_token" value="Wwc+VXo2iplcjaTzDJwyigClTyZ9FF6felko/X3330UefrKyBT1f/eny1q1qSmEgFfTm0jKv+HW7rQ5hYu84Qw==" type="hidden"> | |
html = etree.HTML(text) | |
t = html.xpath("//input[@name='authenticity_token']") | |
try: | |
token = t[0].get('value') | |
except IndexError: | |
print("[+] Error: can't get login token, exit...") | |
os.exit() | |
except Exception as e: | |
print(e) | |
os.exit() | |
#print(token) | |
return token | |
def get_cookie(session): | |
if not os.path.exists("./cookies.txt"): | |
r = session.get("https://github.com/login", headers=headers)#, verify=False, proxies=proxies) | |
payload['authenticity_token'] = get_token(r.content) | |
r = session.post("https://github.com/session", headers=headers, data=payload)#, verify=False, proxies=proxies) | |
#print(r.cookies.get_dict()) | |
#see(r.text) | |
else: | |
with open('./cookies.txt', 'rb') as f: | |
try: | |
cookies = requests.utils.cookiejar_from_dict(pickle.load(f)) | |
except TypeError: | |
os.remove("./cookies.txt") | |
return get_cookie(session) | |
session.cookies=cookies | |
return session | |
def search(url, session): | |
r = session.get(url, headers=headers)#, verify=False, proxies=proxies) | |
html = etree.HTML(r.text) | |
block = html.xpath("//div[@class='code-list-item col-12 py-4 code-list-item-public ']") | |
#print("[+] Info: get item: %i" % len(block)) | |
codes = html.xpath("//div[@class='code-list-item col-12 py-4 code-list-item-public ']/div[@class='file-box blob-wrapper']/table[@class='highlight']/tr/td[@class='blob-code blob-code-inner']") | |
nums = html.xpath("//div[@class='code-list-item col-12 py-4 code-list-item-public ']/div[@class='file-box blob-wrapper']/table[@class='highlight']/tr/td[@class='blob-num']/a") | |
if len(codes) == len(nums): | |
lines = [] | |
strs = None | |
for i in range(len(nums)): | |
#print(etree.tostring(codes[i], method='text')) | |
try: | |
text = etree.tostring(codes[i], method='text') | |
except UnicodeEncodeError: | |
#print("UnicodeEncodeError") | |
continue | |
if nums[i].text == '1': | |
if strs is not None: | |
lines.append(strs) | |
strs = text | |
else: | |
strs = "%s \\n %s" % (strs, text) | |
lines.append(strs) | |
else: | |
print("[+] Error: wrong number get for codes lines, exit") | |
pattern = re.compile('key=(.*)[&|"|\']') | |
pattern1 = re.compile("\w+") | |
pattern2 = re.compile('%\([\w|\.|,]+') | |
for a in lines: | |
#a = a.replace(' ','') | |
strs = re.findall(pattern, str(a)) | |
if len(strs) > 0: | |
results = strs[0].split('"')[0] | |
results = results.split('&')[0] | |
results = results.split('\'')[0] | |
if results == '': | |
continue | |
try: | |
data = re.findall(pattern1, results)[0] | |
except IndexError: | |
print(results) | |
continue | |
if data == 's': | |
resulresults = re.findall(pattern2, a.replace(' ','')) | |
lists = [] | |
for i in results: | |
i = i.replace('%(', '') | |
i = i.split(',') | |
lists.extend(i) | |
lists = set(lists) | |
for i in lists: | |
pattern0 = re.compile("%s[=|:](.*)[\"|']" % i[:6]) | |
results = re.findall(pattern0, a.replace(' ','')) | |
if len(results) > 0: | |
results = results[0].split('\'')[0] | |
print(results.split('"')) | |
#print(a) | |
elif len(data) < 32: | |
pattern0 = re.compile("%s[=|:](.*)[\"|']" % data[:6]) | |
results = re.findall(pattern0, a.replace(' ','')) | |
if len(results) > 0: | |
results = results[0].split('\'')[0] | |
print(results.split('"')) | |
#print(a) | |
else: | |
print(data) | |
words = "https://api.shodan.io/shodan/host/ key=" | |
session = get_cookie(session) | |
threads = [] | |
for i in range(1, 21): | |
url = "https://github.com/search?p=%i&q=%s&type=Code" % (i, words) | |
t=threading.Thread(target = search, args = (url, session)) | |
t.start() | |
threads.append(t) | |
for t in threads: | |
t.join() | |
threads = [] | |
for i in range(21, 41): | |
url = "https://github.com/search?p=%i&q=%s&type=Code" % (i, words) | |
t=threading.Thread(target = search, args = (url, session)) | |
t.start() | |
threads.append(t) | |
for t in threads: | |
t.join() | |
with open('./cookies.txt', 'wb') as f: | |
pickle.dump(requests.utils.dict_from_cookiejar(session.cookies), f) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment