Skip to content

Instantly share code, notes, and snippets.

View pscollins's full-sized avatar

Patrick Collins pscollins

View GitHub Profile
@pscollins
pscollins / gist:8469565
Created January 17, 2014 07:09
multithreaded
import requests
from bs4 import BeautifulSoup
from concurrent import futures
BASE_URL = "http://ordnet.dk/ddo/ordbog?query={}"
def get_ipa(word):
soup = BeautifulSoup(requests.get(BASE_URL.format(word)).text)
try:
return soup.select(".lydskrift")[0].contents[1]
@pscollins
pscollins / gist:8469193
Last active January 3, 2016 13:19
let's try again
import requests
from bs4 import BeautifulSoup
BASE_URL = "http://ordnet.dk/ddo/ordbog?query={}"
def main(path_to_file):
ans = []
words = [line.split(" ")[0] for line in open(path_to_file)]
for word in words:
@pscollins
pscollins / gist:8459832
Created January 16, 2014 17:49
ML scanning
import requests
from bs4 import BeautifulSoup
BASE_URL = "http://en.wiktionary.org/wiki/{}#Danish"
WORDS = ['foo', 'bar', 'baz']
def main():
ans = []
for word in WORDS:
soup = BeautifulSoup(requests.get(BASE_URL.format(word)).text)