allieus/DG 111 - THE VIOLIN.py

## DG 111 - THE VIOLIN.py
# -*- coding: utf-8 -*-

import requests
import re
from bs4 import BeautifulSoup

def fetch_page(url):
    r = requests.get(url)
    return r.text

def get_tags_from_page(url):
    html = fetch_page(url)
    #soup = BeautifulSoup(html, "html.parser")
    soup = BeautifulSoup(html, "lxml")
    contents = soup.find_all('div',
                             {'class': re.compile('(item|composer|work|track)+-container$') }
                             )

    composer, title_head, album, composer, title_head, title_body = [{}]*6

    for t in contents:
        crt_class = t.attrs['class'][0]
        tmp = t.get_text().replace('\n','')
        if 'item-container' in crt_class:
            album = tmp
        elif 'composer-container' in crt_class:
            composer = re.sub(" \(\d{4} - \d{4}\)", '', tmp)
        elif 'work-container' in crt_class:
            title_head = tmp
        elif 'track-container' in crt_class:
            title_body = re.sub("\d{1}?\:\d{3,4}\:\d{2}", '', tmp)

        if 'track-container' in crt_class:
            print(
                album, '__!__', composer, '__!__', title_head, '-', title_body
                )  # for tag

        if contents.index(t) > 40:
            break
    return

url = "http://www.deutschegrammophon.com/kr/cat/4796220"
get_tags_from_page(url)
	# -- coding: utf-8 --

	import requests
	import re
	from bs4 import BeautifulSoup

	def fetch_page(url):
	r = requests.get(url)
	return r.text

	def get_tags_from_page(url):
	html = fetch_page(url)
	#soup = BeautifulSoup(html, "html.parser")
	soup = BeautifulSoup(html, "lxml")
	contents = soup.find_all('div',
	{'class': re.compile('(item\|composer\|work\|track)+-container$') }
	)

	composer, title_head, album, composer, title_head, title_body = [{}]*6

	for t in contents:
	crt_class = t.attrs['class'][0]
	tmp = t.get_text().replace('\n','')
	if 'item-container' in crt_class:
	album = tmp
	elif 'composer-container' in crt_class:
	composer = re.sub(" \(\d{4} - \d{4}\)", '', tmp)
	elif 'work-container' in crt_class:
	title_head = tmp
	elif 'track-container' in crt_class:
	title_body = re.sub("\d{1}?\:\d{3,4}\:\d{2}", '', tmp)

	if 'track-container' in crt_class:
	print(
	album, '__!__', composer, '__!__', title_head, '-', title_body
	) # for tag

	if contents.index(t) > 40:
	break
	return

	url = "http://www.deutschegrammophon.com/kr/cat/4796220"
	get_tags_from_page(url)