bllchmbrs/fun.py

## fun.py
from bs4 import BeautifulSoup
import re
import glob


def get_prod(soup):
    production_companies = []
    for row in soup.select("tr"):
        for th in row.select("th"):
            if th.text.strip() == "Production\ncompany":
                for comp in row.select("td a"):
                    production_companies.append(comp.text)
    return production_companies


for name in sorted(glob.glob("data/wiki/movies/*")):
    with open(name) as f:
        soup = BeautifulSoup(''.join(f.readlines()), 'lxml')
    movie_id = "/wiki/" + re.findall(r"([\d_\w\(\)%,]*)\.html", name)[0]
    production_companies = get_prod(soup)
	from bs4 import BeautifulSoup
	import re
	import glob


	def get_prod(soup):
	production_companies = []
	for row in soup.select("tr"):
	for th in row.select("th"):
	if th.text.strip() == "Production\ncompany":
	for comp in row.select("td a"):
	production_companies.append(comp.text)
	return production_companies


	for name in sorted(glob.glob("data/wiki/movies/*")):
	with open(name) as f:
	soup = BeautifulSoup(''.join(f.readlines()), 'lxml')
	movie_id = "/wiki/" + re.findall(r"([\d_\w\(\)%,]*)\.html", name)[0]
	production_companies = get_prod(soup)