Skip to content

Instantly share code, notes, and snippets.

Last active November 30, 2022 19:57
Show Gist options
  • Save Sobsz/4cf838b02de12dc2431ed6f04ead60fd to your computer and use it in GitHub Desktop.
Save Sobsz/4cf838b02de12dc2431ed6f04ead60fd to your computer and use it in GitHub Desktop.
import csv
import glob
import requests # `pip install requests` in command line
cookies = dict(User="PUT YOUR `User` COOKIE HERE (right click on tatoeba page → inspect element → storage")
list_id = "PUT YOUR LIST ID HERE (it's in the url)"
with open(glob.glob("*.tsv")[0], encoding="utf-8-sig") as file: # the downloaded list, to prevent readding sentences that are already there
done = [i[0] for i in csv.reader(file, delimiter="\t")]
done = []
with open(glob.glob("*.csv")[0], encoding="utf-8-sig") as file: # the sentence ids you wanna add, doesn't have to be a tsv it can just be a text file listing them (in which case change to "sentences.txt")
ids = [i[0] for i in csv.reader(file, delimiter="\t") if i[0] not in done]
for i in range(0, len(ids)): # change the 0 to whatever the last number printed was was to continue after a restart
a = requests.get(""%(ids[i], list_id), cookies = cookies, headers = {"X-Requested-With": "XMLHttpRequest"}, timeout = 10)
if a.text != list_id: print(f"error at {i} {ids[i]}: {a.text}")
except Exception as e:
print(f"error at {i} {ids[i]}: {e}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment