Skip to content

Instantly share code, notes, and snippets.

@nilshamerlinck
Created April 27, 2013 12:21
Show Gist options
  • Save nilshamerlinck/5472929 to your computer and use it in GitHub Desktop.
Save nilshamerlinck/5472929 to your computer and use it in GitHub Desktop.
Petit script python codé à l'arrache pour récupérer les urls directes des podcasts France Inter et France Culture à partir de leur page descriptive.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import codecs, locale, os, sys
sys.stdout = codecs.getwriter('utf-8')(sys.stdout)
import httplib
import urllib2
from cookielib import CookieJar
import re
import urlparse
import json
"""
http://www.franceinter.fr/emission-la-bas-si-jy-suis-julian-assange-cyberterroriste-2
récupérer http://www.franceinter.fr/player/reecouter?play=590616
(http://www.franceinter.fr/player/reecouter?play=590616)
contient iframe http://www.franceinter.fr/player/export-reecouter?content=590616
<object id='flash_player' classid='clsid:D27CDB6E-AE6D-11cf-96B8-444553540000' codebase='http://download.macromedia.com/pub/shockwave/cabs/flash/swflash.cab#version=6,0,40,0' WIDTH='389' HEIGHT='33'>
<param name='movie' value='/sites/all/modules/rf/rf_player/swf/loader.swf?playerUrl=/sites/all/modules/rf/rf_player/swf/maxi_player.swf&type=aod&startTime=0&endTime=0&basePathConfig=/&nextId=&prevId=&idCurrent=590488&urlAOD=sites/default/files/sons/2013/03/s12/NET_FI_f36dfa39-b5d1-4fa5-a892-b97075c918b6.mp3&infos=[{"media_name":"julian-assange-cyberterroriste","media_genre":"Autres","dom_id":"299099206552","media_section3":"France Inter","media_section2":"aod","media_section1":"emission","media_section4":"la-bas-si-jy-suis","media_section5":"20130320","media_length":3325,"send_type":"","domaine":""}]' />
wget -O julian.mp3 "http://www.franceinter.fr/sites/default/files/sons/2013/03/s12/NET_FI_f36dfa39-b5d1-4fa5-a892-b97075c918b6.mp3"
"""
def download(url):
print url
cj = CookieJar()
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
opener.addheaders = [
('Content-type', 'application/x-www-form-urlencoded'),
('User-Agent', 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'),
('Accept', 'text/plain, image/jpeg')
]
try:
netloc = urlparse.urlsplit(url).netloc # www.franceculture.fr ou www.franceinter.fr
resp = opener.open(url)
c = resp.read().decode('utf-8')
RE_CODE = re.compile(r'href="/player/reecouter\?play=(?P<code>\d+)', re.UNICODE)
m = RE_CODE.search(c)
code = int(m.group('code'))
print code
url = 'http://%s/player/export-reecouter?content=%s' % (netloc, code)
print url
resp = opener.open(url)
c = resp.read().decode('utf-8')
RE_PARAMS = re.compile(r"<param name='movie'[^>]+>", re.UNICODE)
m = RE_PARAMS.search(c)
i = m.group().find('?')
j = m.group().find(']')
params = m.group(0)[i+1:j+1]
d = dict(urlparse.parse_qsl(params))
infos = json.loads(d['infos'])[0]
print 'wget -O %s-%s-%s.mp3 "http://%s/%s"' % (
infos['media_section4'],
infos['media_section5'],
infos['media_name'],
netloc,
d['urlAOD'])
except:
raise
return 1
return 0
download(sys.argv[1])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment