Created
February 22, 2022 05:36
-
-
Save rinsuki/ea4bf57020037577c4c5c8120c37fc19 to your computer and use it in GitHub Desktop.
Twitterデータ(DLできるやつ)からニコニコのマイリストしたログを抽出
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import datetime | |
import sys | |
dir = sys.argv[1] | |
def jsloader(o): | |
with open(dir + "/" + o["fileName"], "r") as f: | |
r = f.read() | |
prefix = "window." + o["globalName"] + " = " | |
if not r.startswith(prefix): | |
raise Exception("???") | |
return json.loads(r[len(prefix):]) | |
manifest = jsloader({"fileName": "data/manifest.js", "globalName": "__THAR_CONFIG"}) | |
print(manifest) | |
mylists = [] | |
for tweetsFile in manifest["dataTypes"]["tweet"]["files"]: | |
tweets = jsloader(tweetsFile) | |
for tweet in tweets: | |
tweet = tweet["tweet"] | |
if tweet["source"] != '<a href="http://www.nicovideo.jp/" rel="nofollow">niconico ニコレポ連携</a>': | |
continue | |
if tweet["full_text"].startswith("【マイリスト】"): | |
# print(tweet["created_at"]) | |
mylists.append({ | |
"niconico_id": tweet["entities"]["hashtags"][-1]["text"], | |
"tweet_id": tweet["id"], | |
"created_at": datetime.datetime.strptime(tweet["created_at"] ,'%a %b %d %H:%M:%S %z %Y').isoformat(), | |
}) | |
mylists.sort(key=lambda x:x["created_at"]) | |
with open("extracted_mylists.json", "w") as f: | |
json.dump(mylists, f, indent="\t") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment