Skip to content

Instantly share code, notes, and snippets.

@bjctw
Last active March 31, 2022 02:59
Show Gist options
  • Save bjctw/4854e67b9888c0f5686aa50cb3276046 to your computer and use it in GitHub Desktop.
Save bjctw/4854e67b9888c0f5686aa50cb3276046 to your computer and use it in GitHub Desktop.
Download Niconama timeshift comments
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Download comments from Niconama
#
#Modify from @panam510's and @tomo0611's programs
#https://gist.github.com/panam510/c5d0fd8cd969e2809f87ced217a4f6d8
#https://gist.github.com/tomo0611/68bda43be6574182b2f58473eb577c78
import sys
import requests
import html
import json
import re
import websocket
try:
import thread
except ImportError:
import _thread as thread
import time
import random
import xml.etree.ElementTree as ET
from xml.dom.minidom import parseString
from datetime import datetime
import os.path
from datetime import datetime, timedelta, timezone
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
is_debug = 0
is_remove_after_download = 1
class bcolors:
HEADER = '\033[95m'
OKBLUE = '\033[94m'
OKCYAN = '\033[96m'
OKGREEN = '\033[92m'
WARNING = '\033[93m'
FAIL = '\033[91m'
ENDC = '\033[0m'
BOLD = '\033[1m'
UNDERLINE = '\033[4m'
def print_color(s, color=bcolors.OKGREEN):
print(color + s + bcolors.ENDC)
def print_err(s):
print_color(s, bcolors.FAIL)
table = str.maketrans({
"<": "&lt;",
">": "&gt;",
"&": "&amp;",
"'": "&apos;",
'"': "&quot;",
})
def xmlesc(txt):
return txt.translate(table)
print_color("\n\n=== Download Niconama timeshift comments ===\n", bcolors.HEADER + bcolors.UNDERLINE)
webdriver_path = "/home/ben/sf_share_ubuntu/selenium/chromedriver"
user_data_path = "/home/ben/snap/chromium/common/chromium"
profile_path = "profile-directory=Default"
pathname = "/mnt/c/Users/bchen/Documents/nico/" #only on WSL
if os.path.isdir(pathname):
#Windows platform
webdriver_path = "/mnt/c/Users/bchen/share_ubuntu/selenium/chromedriver.exe"
#user_data_path = "/mnt/c/Users/bchen/AppData/Local/Google/Chrome/User Data"
user_data_path = "C:\\Users\\bchen\\AppData\\Local\\Google\\Chrome\\selenium_user_data"
profile_path = "Profile 2"
ser = Service(webdriver_path)
chrome_options = Options()
opt = "user-data-dir=" + user_data_path
chrome_options.add_argument(opt)
opt = "profile-directory=" + profile_path
chrome_options.add_argument(opt)
driver = webdriver.Chrome(service=ser, options=chrome_options)
driver.minimize_window()
driver.get("https://www.nicovideo.jp/my/timeshift-reservations")
try:
element = WebDriverWait(driver, 3000, poll_frequency=10).until(
EC.presence_of_element_located((By.ID, "UserPage-app"))
)
except:
driver.quit()
sys.exit("Login timeout!\n")
#print('Got Live Items!')
ses = requests.Session()
# Set correct user agent
selenium_user_agent = driver.execute_script("return navigator.userAgent;")
ses.headers.update({"user-agent": selenium_user_agent})
cookies = driver.get_cookies()
#print(cookies)
for cookie in cookies:
if 'expiry' in cookie:
ses.cookies.set(cookie['name'], cookie['value'], domain=cookie['domain'], expires=cookie['expiry'], rest=cookie['httpOnly'], path=cookie['path'], secure=cookie['secure'])
else:
ses.cookies.set(cookie['name'], cookie['value'], domain=cookie['domain'], rest=cookie['httpOnly'], path=cookie['path'], secure=cookie['secure'])
driver.quit()
#sys.exit("test done.\n") #for test
#Get timshift reserved items
url = 'https://live.nicovideo.jp/api/watchingreservation?mode=detaillist'
response = ses.get(url)
root = ET.fromstring(response.text)
videos = []
#videos = ["330357885"]
titles = []
json_data = {}
room_info = {}
statistics = {}
all_chats = []
chat_msgs = []
last_res = 0
min_res = 50000
vid_end = 0
when = 0
MESSAGE_COUNT = 1000
ws = 0
ws2 = 0
vid_thread_lock = thread.allocate_lock()
msg_thread_lock = thread.allocate_lock()
msg_receive_lock = thread.allocate_lock()
vid_thread_lock.acquire()
msg_thread_lock.acquire()
msg_receive_lock.acquire()
if len(videos) == 0:
for reserved_item in root.findall('.//reserved_item'):
if reserved_item.findtext('./status') == 'WATCH':
videos.append(reserved_item.findtext('./vid'))
titles.append(reserved_item.findtext('./title'))
'''
print('VIDEOS:')
print(videos)
print('TITLES:')
print(titles)
'''
#Download comments
for vid in videos:
vid_end = 0
when = 0
last_res = 0
get_first = 0
room_info.clear()
all_chats.clear()
chat_msgs.clear()
i = 0
liveid = 'lv' + vid
url = 'https://live2.nicovideo.jp/watch/'+liveid
print("URL: " + url)
res_text = ses.get(url).text
results = re.findall('<script id="embedded-data" data-props="{.*?}"></script>',res_text)
for result in results:
# https://docs.python.org/3/library/html.html
data = html.unescape(result[39:-11])
json_data = json.loads(data)
#j = json.dumps(j, ensure_ascii=False, allow_nan=True, indent=4)
#print(j)
#print("Title : "+json_data["socialGroup"]["name"])
title = json_data["program"]["title"]
print("Title : " + title)
beginTime = json_data["program"]["beginTime"]
communities_id = json_data["socialGroup"]["id"]
def on_message(ws, message):
global room_info, is_debug
# {"type":"serverTime","data":{"currentMs":"2020-12-16T15:59:20.450+09:00"}}
# {"type":"seat","data":{"keepIntervalSec":30}}
# {"type":"stream","data":{"uri":"https://XXX.dmc.nico/hlslive/ht2_nicolive/XXX/master.m3u8?ht2_nicolive=XXX","syncUri":"https://pc086544093.dmc.nico/hlslive/ht2_nicolive/nicolive-XXX/stream_sync.json?ht2_nicolive=anonymous-XXX","quality":"high","availableQualities":["abr","high","normal","low","super_low","audio_high"],"protocol":"hls"}}
# {"type":"room","data":{"name":"アリーナ","messageServer":{"uri":"wss://msgd.live2.nicovideo.jp/websocket","type":"niwavided"},"threadId":"M.XXXXX","isFirst":true,"waybackkey":"XXX.ik0CkRw9OrhkIR7fRfP-w-0t1Bs"}}
# {"type":"schedule","data":{"begin":"2020-12-16T11:00:00+09:00","end":"2020-12-17T04:00:00+09:00"}}
# {"type":"statistics","data":{"viewers":7465,"comments":9668,"adPoints":6300,"giftPoints":1270}}
# {"type":"ping"}
if(is_debug):
print("on_msg : "+message)
js = json.loads(message)
#print(json.dumps(js))
if js["type"] == "room":
room_info = js
if vid_thread_lock.locked():
vid_thread_lock.release()
elif js["type"] == "ping":
ws.send('{"type":"pong"}')
ws.send('{"type":"keepSeat"}')
def on_error(ws, error):
print("on_err : "+error)
def on_close(ws):
if(is_debug):
print("### closed ###")
if vid_thread_lock.locked():
vid_thread_lock.release()
thread.exit()
def on_open(ws):
ws.send('{"type":"startWatching","data":{"stream":{"quality":"high","protocol":"hls","latency":"high","chasePlay":false},"room":{"protocol":"webSocket","commentable":true},"reconnect":false}}')
def startWebSocket(*args):
global json_data, ws, is_debug
if(is_debug):
print(json_data["site"]["relive"]["webSocketUrl"]+"&frontend_id="+str(json_data["site"]["frontendId"]))
headers = {'User-Agent:Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:86.0) Gecko/20100101 Firefox/86.0'}
ws = websocket.WebSocketApp(json_data["site"]["relive"]["webSocketUrl"]+"&frontend_id="+str(json_data["site"]["frontendId"]),
on_message = on_message, on_error = on_error, on_close = on_close,header = headers)
ws.on_open = on_open
ws.run_forever()
thread.start_new_thread(startWebSocket, ())
vid_thread_lock.acquire()
'''
while True:
if room_info != {}:
break
else:
time.sleep(1)
'''
if(is_debug):
print("RoomInfo was found!")
def on_message2(ws, message):
global chat_msgs, all_chats, last_res, vid_end, MESSAGE_COUNT, when, is_debug, get_first, min_res
#global messages, starts_since
# {"chat":{"thread":"","no":2998,"vpos":190476,"date":12381493116,"date_usec":1274545,"mail":"184","user_id":"fUOYn-XXX","premium":1,"anonymity":1,"content":"てすと"}}
chat_json = json.loads(message)
if is_debug:
print("on_msg2 : "+json.dumps(chat_json, ensure_ascii=False, allow_nan=True))
if 'thread' in chat_json:
#print('THREAD => ')
if chat_json["thread"]["resultcode"] != 0:
print("ERROR! RESULT CODE: " + str(chat_json["thread"]["resultcode"]))
return
else:
last_res = chat_json["thread"]["last_res"]
if(is_debug):
print("last_res: " + str(last_res))
else:
print("\rlast_res: " + "\rlast_res: " + str(last_res), end='')
elif 'chat' in chat_json:
#print('CHAT => ')
chat_msgs.append(chat_json)
#print("XML: " + xml)
no = chat_json["chat"]["no"]
''' 2021/4/26 the no of returned chats can be not reach to (last_res - MESSAGE_COUNT + 1) (NG comments are omitted)
(eg. lv331451271 =>
SEND:
[{"ping":{"content":"rs:0"}}, {"ping":{"content":"ps:0"}}, {"thread":{"thread":"M.LrvAy76r5I9kiSjqU3oTyg", "version":"20061206","user_id":"guest", "waybackkey": "1618461706.x4bGLEcIdWXDIdHyDiqeF1wH8bo", "when":"1619363335", "res_from":-1000, "with_global":1,"scores":1,"nicoru":0}}, {"ping":{"content":"pf:0"}},{"ping":{"content":"rf:0"}}]
last_res: 1891
on_msg2 : {"chat": {"thread": "M.LrvAy76r5I9kiSjqU3oTyg", "no": 895, "vpos": 2624800, "date": 1619317044, "date_usec": 592610, "mail": "184", "user_id": "ih45CPpNjlql2xYuWDBNNUW5EWM", "anonymity": 1, "content": "フジ→計812人内プレ265人総14216米◆MX→計482人内プレ210人総22086米"}}
=> wait for 892 but never get it
ORIGINAL CODE:
if no == last_res - MESSAGE_COUNT + 1:
when = chat_json["chat"]["date"] + 1 # +1 for including comments with the same time
if(is_debug):
print('CHAT => when:' + str(when))
'''
if not get_first:
get_first =1
when = chat_json["chat"]["date"] + 1 # +1 for including comments with the same time
if(is_debug):
print('CHAT => when:' + str(when))
if no == 1:
vid_end = 1
elif 'ping' in chat_json:
if chat_json["ping"]["content"] == "rf:0":
if(len(chat_msgs) != 0):
chat_msgs.extend(all_chats)
all_chats = chat_msgs.copy()
chat_msgs= []
if last_res > MESSAGE_COUNT and vid_end != 1:
min_res = last_res
get_first = 0
msg = ('[{"ping":{"content":"rs:0"}}, {"ping":{"content":"ps:0"}}, '
'{"thread":{"thread":"'+room_info["data"]["threadId"]+'", "version":"20061206","user_id":"guest", '
'"waybackkey": "'+ room_info["data"]["waybackkey"]+'", '
'"when":"'+ str(when) + '", '
'"res_from":-' + str(MESSAGE_COUNT) + ', '
'"with_global":1,"scores":1,"nicoru":0}}, '
'{"ping":{"content":"pf:0"}},{"ping":{"content":"rf:0"}}]')
if(is_debug):
print("SEND:")
print(msg)
ws.send(msg)
#2022/03/08 lv335924572 stop when last_res=321 and the first no=16
#elif vid_end == 1 or min_res == last_res:
else:
print("\n", end='')
if(is_debug):
print("msg_receive_lock.release()")
msg_receive_lock.release()
on_close2(ws) #workaround for on_close2 is not called
#print("on_msg2 : "+json.dumps(chat_json, ensure_ascii=False, allow_nan=True))
def on_error2(ws, error):
print("on_err2 : "+error)
def on_close2(ws):
if(is_debug):
print("### closed2 ###")
if msg_thread_lock.locked():
msg_thread_lock.release()
thread.exit()
def on_open2(ws):
global room_info, is_debug
if(is_debug):
print("Connected to Messaging Server!")
# time.sleep(1)
msg = ('[{"ping":{"content":"rs:0"}}, {"ping":{"content":"ps:0"}}, '
'{"thread":{"thread":"'+room_info["data"]["threadId"]+'", "version":"20061206","user_id":"guest", '
'"when":"1893427200", ' #2030/1/1
'"res_from":-' + str(MESSAGE_COUNT) + ', '
'"with_global":1,"scores":1,"nicoru":0}}, '
'{"ping":{"content":"pf:0"}},{"ping":{"content":"rf:0"}}]')
ws.send(msg)
def startWebSocket2(*args):
global room_info, ws2, is_debug
if(is_debug):
print("Connect to Messaging Server...")
headers2 = {'User-Agent:Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:86.0) Gecko/20100101 Firefox/86.0'}
ws2 = websocket.WebSocketApp(room_info["data"]["messageServer"]["uri"],
on_message = on_message2, on_error = on_error2, on_close = on_close2, header = headers2)
ws2.on_open = on_open2
ws2.run_forever()
thread.start_new_thread(startWebSocket2, ())
# sys.exit("debug exit.\n")
#wait for all message to be received
msg_receive_lock.acquire()
#while not vid_end:
# print("sleeping...")
# time.sleep(1)
ws2.close()
ws.close()
vid_thread_lock.acquire()
msg_thread_lock.acquire()
#print(all_chats[0:10])
#{'chat': {'thread': 'M.GAoZ2Y5O2t9ZdPQpwBMe3Q', 'no': 12621, 'vpos': 8047055,
# 'date': 1612027275, 'date_usec': 353764, 'mail': '184',
# 'user_id': '_iO-Bd_ECjfR-AEHgD-w0_2mVuk', 'anonymity': 1,
# 'content': 'レシピ本出すんだねw'}}
# ====>
#<chat anonymity="1" date="1611064751" mail="184" no="8" thread="M.uRri7oe_UGOd7MBAywE5qg"
# premium="1" user_id="qVhqeFQXjePFw-6SXbQIGWTdEcc" vpos="6835000">どアプ待機</chat>
xml = '<?xml version="1.0" encoding="utf-8"?>\n<packet>\n'
last_chat_no = 0
chat_count = 0
for chat in all_chats: #chat is a dict
#print(chat)
if chat["chat"]["no"] > last_chat_no: #skip the same chats
last_chat_no = chat["chat"]["no"]
chat_count += 1
xml += '<chat'
xml += ' thread="' + str(chat["chat"]["thread"]) + '"'
xml += ' no="' + str(chat["chat"]["no"]) + '"'
if 'vpos' in chat["chat"]:
xml += ' vpos="' + str(chat["chat"]["vpos"]) + '"'
xml += ' date="' + str(chat["chat"]["date"]) + '"'
if 'date_usec' in chat["chat"]:
xml += ' date_usec="' + str(chat["chat"]["date_usec"]) + '"'
if "mail" in chat["chat"]:
xml += ' mail="' + chat["chat"]["mail"] + '"'
xml += ' user_id="' + chat["chat"]["user_id"] + '"'
if "anonymity" in chat["chat"]:
xml += ' anonymity="' + str(chat["chat"]["anonymity"]) + '"'
if "premium" in chat["chat"]:
xml += ' premium="' + str(chat["chat"]["premium"]) + '"'
xml += ">"
xml += xmlesc(chat["chat"]["content"])
xml += '</chat>\n'
#xml += str(dicttoxml(chat, attr_type=False, root=False), "utf-8")
xml += '</packet>'
#print(xml)
#with open("raw.xml", "w") as xml_file:
# xml_file.write(xml)
#parse it to prevent XML syntax error
dom = parseString(xml)
#print(dom.toprettyxml())
#printf("begin time: " + datetime.utcfromtimestamp(beginTime+TZOFFSET).strftime('%Y-%m-%d %H:%M:%S'))
title = re.sub("/", "/", title)
filename = (title + "-" +
datetime.fromtimestamp(beginTime).strftime('%Y%m%d') + "-lv" + vid + ".xml")
pathname = "/mnt/c/Users/bchen/Documents/nico/" + communities_id + "/"
if os.path.exists(pathname):
filename = pathname + filename
else:
print_err(pathname + " is not existed.")
print(bcolors.WARNING + "writting " + str(chat_count) + " comments to [" + filename + "]..." + bcolors.ENDC)
with open(filename, "w") as xml_file:
dom.writexml(xml_file, addindent="\t", encoding="utf-8")
#dom.writexml(xml_file, addindent="\t", newl="\n")
xml_file.close()
if is_remove_after_download and os.path.isdir(pathname) and os.path.isfile(filename):
#url = "https://live.nicovideo.jp/my?delete=timeshift&vid=" + vid + "&confirm=" + token #abandoned
url = "https://live2.nicovideo.jp/api/v2/programs/lv" + vid + "/timeshift/reservation"
response = ses.delete(url)
#print(response.text)
response.raise_for_status()
print("Reserved item " + vid + " is removed.")
i += 1
print_color(str(len(videos)) + " file(s) processed.", bcolors.OKCYAN)
#url = "https://account.nicovideo.jp/logout"
#url = "https://secure.nicovideo.jp/secure/logout"
#response = ses.get(url)
#response.raise_for_status()
print('Done')
#print('RES:'+response.text);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment