trey/Howto.md

## apod
#!/usr/bin/env sh

cd $HOME/Code/apod
export WORKON_HOME=$HOME/.virtualenvs
export VIRTUALENVWRAPPER_PYTHON=/usr/local/bin/python
source /usr/local/bin/virtualenvwrapper.sh
workon apod
if [ $# -eq 1 ]
then
    ./scrape.py $1
else
    ./scrape.py
fi
deactivate

## Howto.md

      
    Raw
  

              Howto.md
            
          
    Set up a recurring task to run this script every morning at 3am:
crontab -e

0 3 * * *  ~/bin/common/apod

Then go into System Preferences > Energy Saver and set a schedule for

Start up or wake Every Day at 2:59 AM
Sleep Every Day at 3:01 AM


## requirements.txt
PyTumblr==0.0.6
beautifulsoup4==4.5.0
html5lib==1.0b10
httplib2==0.9.2
httpretty==0.8.14
oauth2==1.9.0.post1
pep8==1.7.0
six==1.10.0
webencodings==0.5
wsgiref==0.1.2

## scrape.py
#!/usr/bin/env python
import re
import urllib2
import argparse
from datetime import date, timedelta
from bs4 import BeautifulSoup
import pytumblr

client = pytumblr.TumblrRestClient(
    '<consumer_key>',
    '<consumer_secret>',
    '<oauth_token>',
    '<oauth_secret>',
)

parser = argparse.ArgumentParser()
parser.add_argument('date', type=int,
                    nargs='?',
                    help='Number of days in the past. Leave blank for today.')
args = parser.parse_args()

if args.date:
    apod_date = date.today() - timedelta(args.date)
else:
    apod_date = date.today()

apod_home_url = 'http://apod.nasa.gov/apod/'
apod_today_url = apod_home_url + 'ap' \
    + apod_date.strftime('%y%m%d') \
    + '.html'
apod_clean_url = re.sub('http://', '', apod_today_url)
apod = urllib2.urlopen(apod_today_url).read()
soup = BeautifulSoup(apod, 'html5lib')

for a in soup.find_all('a'):
    # Append `apod_home_url` to any url that doesn't start with `http`.
    if re.match('^[^http]', a['href']):
        a['href'] = apod_home_url + a['href']
    # Get rid of line breaks that show up inside an `href`.
    a['href'] = re.sub('\n', '', a['href'])

# Strip all the leading and trailing whitespace inside tags.
for b in soup.find_all('b'):
    if b.string:
        b.string.replace_with(b.string.strip())
for p in soup.find_all('p'):
    if p.string:
        p.string.replace_with(p.string.strip())
for i in soup.find_all('i'):
    if i.string:
        i.string.replace_with(i.string.strip())

title = soup.select('center + center > b:nth-of-type(1)')[0]\
    .get_text(strip=True)

# Markdown heading with the date of this entry.
dateheading = '# ' + soup.select('center:nth-of-type(1) p:nth-of-type(2)')[0]\
    .get_text(strip=True)
content1 = str(soup.select('center + center')[0])\
    .replace('<center>', '')\
    .replace('</center>', '')\
    .strip()
content1 = re.sub('\n+', ' ', content1)
content1 = re.sub(' +', ' ', content1)
content2 = str(soup.select('center + p')[0])\
    .replace('<p>', '')\
    .replace('</p>', '')\
    .strip()
content2 = re.sub('\n+', ' ', content2)
content2 = re.sub(' +', ' ', content2)
content3 = '&infin; Source: <a href="' + apod_today_url + '">'\
    + apod_clean_url + '</a>'

caption = dateheading +\
    '\n\n' + content1.decode('utf-8') +\
    '\n\n' + content2.decode('utf-8') +\
    '\n\n' + content3.decode('utf-8')

if soup.select('center:nth-of-type(1) p:nth-of-type(2) a'):
    # There's an image here.
    image = soup.select('center:nth-of-type(1) p:nth-of-type(2) a')[0]['href']

    client.create_photo('apod', source=image.encode('utf-8'),
                        caption=caption.encode('utf-8'),
                        slug=title.encode('utf-8'),
                        format='markdown')

else:
    # No image for this one. It's hopefully a YouTube video.
    image = soup.select(
        'center:nth-of-type(1) p:nth-of-type(2) iframe'
        )[0]['src'].replace('/embed/', '/watch?v=').replace('?rel=0', '')

    client.create_video('apod', embed=image.encode('utf-8'),
                        caption=caption.encode('utf-8'),
                        slug=title.encode('utf-8'),
                        format='markdown')

# print image + '\n'
# print title + '\n'
# print apod_today_url + '\n'
# print dateheading + '\n'
# print content1 + '\n'
# print content2
# print caption

## thanks.md

      
    Raw
  

              thanks.md
            
          
nasa-apod-desktop/nasa_apod_desktop.py at master · randomdrake/nasa-apod-desktop · GitHub

(Googled for python command to download latest apod) :)


python - Beautiful Soup to parse url to get another urls data - Stack Overflow
Remove empty spaces inside <p> tags using BeautifulSoup - www.scriptscoop3.com
python - virtualenvwrapper functions unavailable in shell scripts - Stack Overflow
datetime - Formatting "yesterday's" date in python - Stack Overflow
15.4. argparse — Parser for command-line options, arguments and sub-commands — Python 2.7.12 documentation
tumblr/pytumblr: A Python Tumblr API v2 Client
	#!/usr/bin/env sh

	cd $HOME/Code/apod
	export WORKON_HOME=$HOME/.virtualenvs
	export VIRTUALENVWRAPPER_PYTHON=/usr/local/bin/python
	source /usr/local/bin/virtualenvwrapper.sh
	workon apod
	if [ $# -eq 1 ]
	then
	./scrape.py $1
	else
	./scrape.py
	fi
	deactivate
	PyTumblr==0.0.6
	beautifulsoup4==4.5.0
	html5lib==1.0b10
	httplib2==0.9.2
	httpretty==0.8.14
	oauth2==1.9.0.post1
	pep8==1.7.0
	six==1.10.0
	webencodings==0.5
	wsgiref==0.1.2
	#!/usr/bin/env python
	import re
	import urllib2
	import argparse
	from datetime import date, timedelta
	from bs4 import BeautifulSoup
	import pytumblr

	client = pytumblr.TumblrRestClient(
	'<consumer_key>',
	'<consumer_secret>',
	'<oauth_token>',
	'<oauth_secret>',
	)

	parser = argparse.ArgumentParser()
	parser.add_argument('date', type=int,
	nargs='?',
	help='Number of days in the past. Leave blank for today.')
	args = parser.parse_args()

	if args.date:
	apod_date = date.today() - timedelta(args.date)
	else:
	apod_date = date.today()

	apod_home_url = 'http://apod.nasa.gov/apod/'
	apod_today_url = apod_home_url + 'ap' \
	+ apod_date.strftime('%y%m%d') \
	+ '.html'
	apod_clean_url = re.sub('http://', '', apod_today_url)
	apod = urllib2.urlopen(apod_today_url).read()
	soup = BeautifulSoup(apod, 'html5lib')

	for a in soup.find_all('a'):
	# Append `apod_home_url` to any url that doesn't start with `http`.
	if re.match('^[^http]', a['href']):
	a['href'] = apod_home_url + a['href']
	# Get rid of line breaks that show up inside an `href`.
	a['href'] = re.sub('\n', '', a['href'])

	# Strip all the leading and trailing whitespace inside tags.
	for b in soup.find_all('b'):
	if b.string:
	b.string.replace_with(b.string.strip())
	for p in soup.find_all('p'):
	if p.string:
	p.string.replace_with(p.string.strip())
	for i in soup.find_all('i'):
	if i.string:
	i.string.replace_with(i.string.strip())

	title = soup.select('center + center > b:nth-of-type(1)')[0]\
	.get_text(strip=True)

	# Markdown heading with the date of this entry.
	dateheading = '# ' + soup.select('center:nth-of-type(1) p:nth-of-type(2)')[0]\
	.get_text(strip=True)
	content1 = str(soup.select('center + center')[0])\
	.replace('<center>', '')\
	.replace('</center>', '')\
	.strip()
	content1 = re.sub('\n+', ' ', content1)
	content1 = re.sub(' +', ' ', content1)
	content2 = str(soup.select('center + p')[0])\
	.replace('<p>', '')\
	.replace('</p>', '')\
	.strip()
	content2 = re.sub('\n+', ' ', content2)
	content2 = re.sub(' +', ' ', content2)
	content3 = '∞ Source: <a href="' + apod_today_url + '">'\
	+ apod_clean_url + '</a>'

	caption = dateheading +\
	'\n\n' + content1.decode('utf-8') +\
	'\n\n' + content2.decode('utf-8') +\
	'\n\n' + content3.decode('utf-8')

	if soup.select('center:nth-of-type(1) p:nth-of-type(2) a'):
	# There's an image here.
	image = soup.select('center:nth-of-type(1) p:nth-of-type(2) a')[0]['href']

	client.create_photo('apod', source=image.encode('utf-8'),
	caption=caption.encode('utf-8'),
	slug=title.encode('utf-8'),
	format='markdown')

	else:
	# No image for this one. It's hopefully a YouTube video.
	image = soup.select(
	'center:nth-of-type(1) p:nth-of-type(2) iframe'
	)[0]['src'].replace('/embed/', '/watch?v=').replace('?rel=0', '')

	client.create_video('apod', embed=image.encode('utf-8'),
	caption=caption.encode('utf-8'),
	slug=title.encode('utf-8'),
	format='markdown')

	# print image + '\n'
	# print title + '\n'
	# print apod_today_url + '\n'
	# print dateheading + '\n'
	# print content1 + '\n'
	# print content2
	# print caption