Last active
November 30, 2022 19:57
-
-
Save Sobsz/4cf838b02de12dc2431ed6f04ead60fd to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import csv | |
import glob | |
import requests # `pip install requests` in command line | |
cookies = dict(User="PUT YOUR `User` COOKIE HERE (right click on tatoeba page → inspect element → storage") | |
list_id = "PUT YOUR LIST ID HERE (it's in the url)" | |
try: | |
with open(glob.glob("*.tsv")[0], encoding="utf-8-sig") as file: # the downloaded list, to prevent readding sentences that are already there | |
done = [i[0] for i in csv.reader(file, delimiter="\t")] | |
except: | |
done = [] | |
with open(glob.glob("*.csv")[0], encoding="utf-8-sig") as file: # the sentence ids you wanna add, doesn't have to be a tsv it can just be a text file listing them (in which case change to "sentences.txt") | |
ids = [i[0] for i in csv.reader(file, delimiter="\t") if i[0] not in done] | |
print(len(ids)) | |
for i in range(0, len(ids)): # change the 0 to whatever the last number printed was was to continue after a restart | |
print(i) | |
try: | |
a = requests.get("https://tatoeba.org/eng/sentences_lists/add_sentence_to_list/%s/%s"%(ids[i], list_id), cookies = cookies, headers = {"X-Requested-With": "XMLHttpRequest"}, timeout = 10) | |
if a.text != list_id: print(f"error at {i} {ids[i]}: {a.text}") | |
except Exception as e: | |
print(f"error at {i} {ids[i]}: {e}") | |
print("done") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment