Skip to content

Instantly share code, notes, and snippets.

@ckhung
Last active June 24, 2023 09:05
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ckhung/e2f5e1a13f27a2b4b0fa66c3c3fad471 to your computer and use it in GitHub Desktop.
Save ckhung/e2f5e1a13f27a2b4b0fa66c3c3fad471 to your computer and use it in GitHub Desktop.
line inspector w/ selenium
# Enter a python3 interpreter. Then do this:
# exec(open('linespector.py').read())
# And try, line by line, the commented out code at the end.
# Wonderful reference:
# https://cosmocode.io/how-to-connect-selenium-to-an-existing-browser-that-was-opened-manually/
# ( found from here: https://stackoverflow.com/a/70088095 )
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup
from datetime import datetime
import copy, re, base64, magic
def init():
global driver
chrome_options = Options()
chrome_options.add_experimental_option("debuggerAddress", "127.0.0.1:9222")
chrome_driver = '/usr/bin/chromedriver'
driver = webdriver.Chrome(chrome_driver, options=chrome_options)
# print(driver.title)
# mime = magic.Magic(mime=True)
def get_all_tabs():
global driver
all_tabs = {}
for handle in driver.window_handles:
driver.switch_to.window(handle)
all_tabs[driver.title] = handle
return all_tabs
def print_all_msg(chat):
msg_list = chat.find_all('div', {'data-timestamp':True})[::-1]
for msg in msg_list:
time_stamp = datetime.fromtimestamp(int(msg['data-timestamp'])//1000)
if 'messageDate-module__date_wrap__I4ily' in msg['class']:
print('### {} ###'.format(msg['data-message-content']))
elif 'data-message-content-prefix' in msg.attrs:
msg2 = copy.copy(msg)
prefix = msg2['data-message-content-prefix']
# text = msg2.find('div', {'class': 'messageLayout-module__content__PGz66'})
to_del = msg2.find('button')
if to_del: to_del.replaceWith('')
to_del = msg2.find('span', {'class': 'metaInfo-module__read_count__8-U6j'})
if to_del: to_del.replaceWith('')
to_del = msg2.find('time')
if to_del: to_del.replaceWith('')
to_del = msg2.find('pre', {'class': 'username-module__username__vGQGj'})
if to_del:
uname = to_del.text
to_del.replaceWith('')
else:
uname = '*'
print('{} [{}] {}'.format(time_stamp.strftime('%H:%M'), uname, msg2.text))
else:
# failed to recognize this
print(msg2.prettify())
# https://stackoverflow.com/a/47425305
def get_file_content_chrome(driver, uri):
result = driver.execute_async_script("""
var uri = arguments[0];
var callback = arguments[1];
var toBase64 = function(buffer){for(var r,n=new Uint8Array(buffer),t=n.length,a=new Uint8Array(4*Math.ceil(t/3)),i=new Uint8Array(64),o=0,c=0;64>c;++c)i[c]="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/".charCodeAt(c);for(c=0;t-t%3>c;c+=3,o+=4)r=n[c]<<16|n[c+1]<<8|n[c+2],a[o]=i[r>>18],a[o+1]=i[r>>12&63],a[o+2]=i[r>>6&63],a[o+3]=i[63&r];return t%3===1?(r=n[t-1],a[o]=i[r>>2],a[o+1]=i[r<<4&63],a[o+2]=61,a[o+3]=61):t%3===2&&(r=(n[t-2]<<8)+n[t-1],a[o]=i[r>>10],a[o+1]=i[r>>4&63],a[o+2]=i[r<<2&63],a[o+3]=61),new TextDecoder("ascii").decode(a)};
var xhr = new XMLHttpRequest();
xhr.responseType = 'arraybuffer';
xhr.onload = function(){ callback(toBase64(xhr.response)) };
xhr.onerror = function(){ callback(xhr.status) };
xhr.open('GET', uri);
xhr.send();
""", uri)
if type(result) == int :
raise Exception("Request failed with status %s" % result)
return base64.b64decode(result)
def save_blob_as(blob, filepath):
with open(filepath, 'wb') as f:
blob_content = get_file_content_chrome(driver, blob)
# print(magic.from_buffer(blob_content))
# https://github.com/ahupp/python-magic
f.write(blob_content)
def save_all_blobs(chat, path):
images = chat.find_all('img')
for img in images:
blob = img['src']
m = re.search(r'/([\w-]{30,})$', blob)
if m is not None:
save_blob_as(blob, '{}/{}.jpg'.format(path, m.group(1)))
'''
init()
all_tabs = get_all_tabs()
line_handle = all_tabs['LINE']
driver.switch_to.window(line_handle)
# After every switch to a new chat:
page_soup = BeautifulSoup(driver.page_source, 'html.parser')
# with open('a.htm', 'w') as f: f.write(page_soup.prettify())
all_chats = page_soup.find_all('div', {'class': 'chatlistItem-module__chatlist_item__MOwxh'})
current_chat = page_soup.find_all('div', {'class': 'message_list'})
assert 1==len(current_chat)
print_all_msg(current_chat[0])
save_all_blobs(current_chat[0], '/tmp/linespector')
'''
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment