Skip to content

Instantly share code, notes, and snippets.

@gougou6423
Created November 23, 2019 06:56
Show Gist options
  • Save gougou6423/818d6a40d20574e79ff7cd693534e18d to your computer and use it in GitHub Desktop.
Save gougou6423/818d6a40d20574e79ff7cd693534e18d to your computer and use it in GitHub Desktop.
import base64, math, re
from html.parser import HTMLParser
class MyHTMLParser(HTMLParser):
def __init__(self):
super().__init__()
self.line = []
self.table = []
self.c = 0
def handle_starttag(self, tag, attrs):
if tag == 'meta' and attrs[0][1] == 'client':
self.table = re.split('[A-Z]+%', base64.b64decode(attrs[1][1]).decode())
def handle_data(self, data):
if self.get_starttag_text() == '<p>':
self.line.append(data.replace('\xa0', ' ') + '\n')
elif self.get_starttag_text() == '<script type="text/javascript">' and 'codeurl' in data:
self.c = int(re.search('codeurl="([\d]+)"', data).group(1))
def decode(table, c, line):
t_line = [''] * len(line)
for i,code in enumerate(table):
k = math.ceil((i+1) % c)
k = math.ceil(int(code) - k)
t_line[k] = line[i]
return t_line
parser = MyHTMLParser()
with open('test.html') as f:
parser.feed(f.read())
line = decode(parser.table, parser.c, parser.line)
with open('test.txt', 'w') as f:
f.writelines(line)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment