briatte/frculture.py

## frculture.py
# !/usr/local/bin/python3
# coding: utf8

from bs4 import BeautifulSoup # pip install BeautifulSoup4
import os
import re
import sys
import urllib.request

u = sys.argv[1]
p = BeautifulSoup(urllib.request.urlopen(u), 'html.parser')

# compose filename
# ----------------

# part of page to scrape
b = p.find('button', attrs = {'class': 'replay-button'})

# show title
t = b.get('title').strip().replace('Réécouter ', '')
t = re.sub(':|/', '-', t)

# show name
n = b.get('data-asset-surtitle')

# show date
d = b.get('data-asset-source')
d = re.sub('(.*?)(\d{2}).(\d{2}).(\d{4})(.*)', '\\4-\\3-\\2', d)

# filename
f = d + "-FRCULTURE-" + n + " - " + t + '.mp3'

# get URL
# -------

# show file (.mp3)
u = b.get('data-asset-source')

print('[<<] ' + u)
print('[>>] ' + f)
print('[in] ' + os.getcwd())

# download
# --------

h = urllib.request.urlopen(u)
with open(f, 'wb') as output:
    output.write(h.read())

# kthxbye
	# !/usr/local/bin/python3
	# coding: utf8

	from bs4 import BeautifulSoup # pip install BeautifulSoup4
	import os
	import re
	import sys
	import urllib.request

	u = sys.argv[1]
	p = BeautifulSoup(urllib.request.urlopen(u), 'html.parser')

	# compose filename
	# ----------------

	# part of page to scrape
	b = p.find('button', attrs = {'class': 'replay-button'})

	# show title
	t = b.get('title').strip().replace('Réécouter ', '')
	t = re.sub(':\|/', '-', t)

	# show name
	n = b.get('data-asset-surtitle')

	# show date
	d = b.get('data-asset-source')
	d = re.sub('(.?)(\d{2}).(\d{2}).(\d{4})(.)', '\\4-\\3-\\2', d)

	# filename
	f = d + "-FRCULTURE-" + n + " - " + t + '.mp3'

	# get URL
	# -------

	# show file (.mp3)
	u = b.get('data-asset-source')

	print('[<<] ' + u)
	print('[>>] ' + f)
	print('[in] ' + os.getcwd())

	# download
	# --------

	h = urllib.request.urlopen(u)
	with open(f, 'wb') as output:
	output.write(h.read())

	# kthxbye