Skip to content

Instantly share code, notes, and snippets.

@fy0
Created December 26, 2021 07:53
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save fy0/1635173e74f04431b648f92af9911e35 to your computer and use it in GitHub Desktop.
Save fy0/1635173e74f04431b648f92af9911e35 to your computer and use it in GitHub Desktop.
幻塔抽卡概率统计 v1.0
# 起因:https://nga.178.com/read.php?tid=30008586
# 依赖库:pip install requests lxml cssselect
import re
import requests
from lxml import etree
headers = {
'authority': 'nga.178.com',
'pragma': 'no-cache',
'cache-control': 'no-cache',
'upgrade-insecure-requests': '1',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.192 Safari/537.36 OPR/74.0.3911.218',
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'sec-fetch-site': 'same-origin',
'sec-fetch-mode': 'navigate',
'sec-fetch-user': '?1',
'sec-fetch-dest': 'document',
'referer': 'https://nga.178.com/read.php?tid=30008586&_ff=836',
'accept-language': 'zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7',
'cookie': '', # cookeis 写自己的
}
# 单条数据处理
def result_solve(x):
if x:
return list(map(int, x))
def match_pattern1(txt):
try:
return re.search(r'抽卡[::]?(\d+?)[抽|发]\s?(\d+?)金', txt).groups()
except:
pass
try:
return re.search(r'金[核河][::]?\s?(\d+?)[抽|发]\s?(\d+?)金?', txt).groups()
except:
pass
try:
return re.search(r'金[::](\d+?)\s(\d+?)', txt).groups()
except:
pass
try:
return re.search(r'金[核河](\d+?)[抽|发],(\d+?)金?', txt).groups()
except:
pass
try:
return re.search(r'金\s[::]\s(\d+?)\s(\d+?)', txt).groups()
except:
pass
try:
return re.search(r'(\d+?)[抽|发]\s?(\d+?)(?:金|ssr)', txt).groups()
except:
pass
try:
return re.search(r'金核[::]?(\d+)\s?(\d+?)金?', txt).groups()
except:
pass
def match_pattern2(txt):
try:
return re.search(r'黑核[::]?(\d+?)页\s?(\d+?)金?', txt).groups()
except:
pass
try:
return re.search(r'黑[::]?(\d+?)\s(\d+?)', txt).groups()
except:
pass
try:
return re.search(r'黑核[::]?(\d+?)页出了\s?(\d+?)金?', txt).groups()
except:
pass
try:
return re.search(r'黑核[::]?\s?(\d+)页\s?(\d+?)金?', txt).groups()
except:
pass
try:
return re.search(r'黑\s[::]\s(\d+?)\s(\d+?)', txt).groups()
except:
pass
try:
return re.search(r'黑[::]?\s?(\d+?)页(\d+?)', txt).groups()
except:
pass
try:
return re.search(r'黑核(\d+?)页,(\d+?)金?', txt).groups()
except:
pass
try:
return re.search(r'黑核[::]?刚到(\d+?)页\s?(\d+?)金?', txt).groups()
except:
pass
def solve_item(item):
txt = '\n'.join(item.itertext())
floor = int(item.get('id')[len('postcontent'):])
draw_ssr = result_solve(match_pattern1(txt)) or [0, 0]
black_ssr = result_solve(match_pattern2(txt)) or [0, 0]
attach = bool(item.getparent().getparent().xpath("span[contains(@id, 'postattach')]"))
is_data = not (draw_ssr == [0, 0] and black_ssr == [0, 0])
if is_data:
print(f'{floor}楼: {draw_ssr[0]}抽{draw_ssr[1]}金,{black_ssr[0]}页黑核{black_ssr[1]}金,图 {"有" if attach else "无"}')
return { 'floor': floor, 'draw_ssr': draw_ssr, 'black_ssr': black_ssr, 'attach': attach, 'is_data': is_data }
# 每页抓取
def fetch_page(page):
params = (
('tid', '30008586'),
('_ff', '836'),
('page', page)
)
resp = requests.get('https://nga.178.com/read.php', headers=headers, params=params)
page = etree.HTML(resp.text)
items = page.cssselect("span.postcontent")
data = []
for i in items:
data.append(solve_item(i))
return data
# main
all_data = []
for i in range(1, 8):
all_data.extend(fetch_page(i))
# 导出excel
import csv
import time
with open('幻塔概率统计-%s.csv' % time.strftime('%Y-%m-%d %H_%M_%S', time.localtime())
, 'w', newline='', encoding='utf_8_sig') as f:
writer = csv.writer(f) # BOM utf-8, 这样excel不乱码
writer.writerow(['楼层', '抽卡(金/红)', '抽卡SSR数', '黑核抽数', '黑核SSR数', '带图', '存在数据'])
for i in all_data:
writer.writerow([i['floor'], i['draw_ssr'][0], i['draw_ssr'][1], i['black_ssr'][0], i['black_ssr'][1], i['attach'], i['is_data']])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment