Skip to content

Instantly share code, notes, and snippets.

@Heinrich-XIAO
Created March 29, 2024 16:07
Show Gist options
  • Save Heinrich-XIAO/cacf0f09a76fb1e03a0bb186bf2ce7bc to your computer and use it in GitHub Desktop.
Save Heinrich-XIAO/cacf0f09a76fb1e03a0bb186bf2ce7bc to your computer and use it in GitHub Desktop.
# Scrapes duolingo for all users
import requests
from datetime import datetime, timezone
def date(epoch_time):
date_obj = datetime.fromtimestamp(epoch_time, timezone.utc)
# Format the datetime object to dd/mm/yy hh:mm:ss format
formatted_date = date_obj.strftime('%d/%m/%y %H:%M:%S')
return formatted_date
for i in range(1, 1000000000000000000):
headers = {'User-Agent': 'Mozilla/5.0 (X11; Windows x86_64; rv:120.0) Gecko/20100101 Firefox/120.0',}
res = requests.get(f'https://www.duolingo.com/2017-06-30/users/{i}?fields=courses,creationDate,fromLanguage,gemsConfig,globalAmbassadorStatus,hasPlus,id,learningLanguage,lingots,name,picture,roles,streak,streakData{{currentStreak,previousStreak}},subscriberLevel,totalXp,username', headers=headers)
if not res.text.startswith('404'):
json = res.json()
if json == {}:
continue
f = open("output.txt", "a")
f.write(str(json))
print(f'User Id: {json["id"]}')
print(f'Username: {json["username"]}')
print(f'Speaks: {json["fromLanguage"]}')
print(f'Streak: {json["streak"]}')
print(f'Account Creation Date: {date(json["creationDate"])}')
print(f'Total XP: {json["totalXp"]}')
if 'name' in json:
print(f'Name: {json["name"]}')
print(f'Learning:')
duplicates = []
for course in json["courses"]:
if course['title'] not in duplicates:
print(f' {course["title"]}')
duplicates.append(course['title'])
print()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment