Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ahmethakanbesel/fa6c8de3f90b8e0df30d79dd114eb9d7 to your computer and use it in GitHub Desktop.
Save ahmethakanbesel/fa6c8de3f90b8e0df30d79dd114eb9d7 to your computer and use it in GitHub Desktop.
Oxford Learner's Dictionaries CSV Export Tool
import os
from bs4 import BeautifulSoup
import requests
if __name__ == '__main__':
words = [
['Word', 'CEFR Level', 'Type', 'Definition Path', 'British MP3', 'British OGG', 'American MP3', 'American OGG']]
cache_file = 'page.html'
if not os.path.isfile(cache_file):
# Download web page by requests
url = 'https://www.oxfordlearnersdictionaries.com/wordlists/oxford3000-5000'
response = requests.get(url)
# Save response to a file
with open(cache_file, 'w') as f:
f.write(response.text)
with open(cache_file, 'r', encoding='utf-8') as f:
parser = BeautifulSoup(f, "html.parser")
parser.find_all('li')
for li in parser.find_all('li'):
word = li.attrs['data-hw']
cefr = ''
if 'data-ox5000' in li.attrs:
cefr = li.attrs['data-ox5000']
elif 'data-ox3000' in li.attrs:
cefr = li.attrs['data-ox3000']
word_type = li.find_next('span').text
definition = li.find_next('a')['href']
british_pronunciation = li.find_next('div', {'class': 'sound audio_play_button icon-audio pron-uk'})
british_mp3 = british_pronunciation.attrs['data-src-mp3']
british_ogg = british_pronunciation.attrs['data-src-ogg']
american_pronunciation = li.find_next('div', {'class': 'sound audio_play_button icon-audio pron-us'})
american_mp3 = british_pronunciation.attrs['data-src-mp3']
american_ogg = british_pronunciation.attrs['data-src-ogg']
words.append([word, cefr, word_type, definition, british_mp3, british_ogg, american_mp3, american_ogg])
# Save to CSV file
with open('words.csv', 'w', encoding='utf-8') as f:
for word in words:
f.write(','.join(word) + '\n')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment