Last active
August 29, 2015 14:05
-
-
Save KTachibanaM/80a92a3dfe152797c2a4 to your computer and use it in GitHub Desktop.
Plum Lis tracker
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Dependencies are beautifulsoup4, Pillow | |
# For Pillow use "pip install --use-wheel Pillow" | |
import os | |
# constants | |
NAME = 'Plum Lis' | |
G_PLUS_ABOUT_URL = 'https://plus.google.com/+PlumLis233/about' | |
if not os.path.exists('avatar'): | |
os.mkdir('avatar') | |
if not os.path.exists('cover'): | |
os.mkdir('cover') | |
if not os.path.exists('blog'): | |
os.mkdir('blog') | |
NEW_AVATAR_FILE_PATH = os.path.join('avatar', 'new.jpg') | |
OLD_AVATAR_FILE_PATH = os.path.join('avatar', 'old.jpg') | |
NEW_COVER_FILE_PATH = os.path.join('cover', 'new.jpg') | |
OLD_COVER_FILE_PATH = os.path.join('cover', 'old.jpg') | |
NEW_BLOG_ENTRY_FILE_PATH = os.path.join('blog', 'new.txt') | |
OLD_BLOG_ENTRY_FILE_PATH = os.path.join('blog', 'old.txt') | |
from bs4 import BeautifulSoup | |
import urllib | |
import time | |
URL_LIB_TIMEOUT_SLEEP = 0.5 | |
# g plus about page | |
G_PLUS_ABOUT_PAGE_AVATAR_CLASS_NAME = 'fa-kz Zxa' | |
G_PLUS_ABOUT_PAGE_COVER_CLASS_NAME = 'aGb hXa z3Hx4b' | |
about_content = '' | |
while True: | |
try: | |
print('Reading ' + G_PLUS_ABOUT_URL + ' ...') | |
about_content = urllib.urlopen(G_PLUS_ABOUT_URL).read() | |
print("Read " + G_PLUS_ABOUT_URL) | |
break | |
except IOError: | |
time.sleep(URL_LIB_TIMEOUT_SLEEP) | |
about_soup = BeautifulSoup(about_content) | |
from PIL import Image | |
def compare_image_exactly(one, another): | |
if not one.size == another.size: | |
return False | |
one_data = list(one.getdata()) | |
another_data = list(another.getdata()) | |
length = len(one_data) | |
for i in xrange(length): | |
if not one_data[i] == another_data[i]: | |
return False | |
return True | |
# download avatar | |
avatar = about_soup.find(name='img', attrs={"class": G_PLUS_ABOUT_PAGE_AVATAR_CLASS_NAME}) | |
avatar_url = 'https:' + avatar['src'] | |
while True: | |
try: | |
print('Downloading ' + avatar_url + ' ...') | |
urllib.urlretrieve(url=avatar_url, filename=NEW_AVATAR_FILE_PATH) | |
print('Downloaded ' + avatar_url) | |
break | |
except IOError: | |
time.sleep(URL_LIB_TIMEOUT_SLEEP) | |
# compare old and new avatars | |
if os.path.exists(OLD_AVATAR_FILE_PATH): | |
old_avatar = Image.open(OLD_AVATAR_FILE_PATH) | |
new_avatar = Image.open(NEW_AVATAR_FILE_PATH) | |
if not compare_image_exactly(old_avatar, new_avatar): | |
print(NAME + ' has changed avatar') | |
else: | |
print(NAME + ' has NOT changed avatar') | |
os.remove(OLD_AVATAR_FILE_PATH) | |
# rename new avatar to old avatar | |
os.rename(NEW_AVATAR_FILE_PATH, OLD_AVATAR_FILE_PATH) | |
# download cover | |
cover = about_soup.find(name='img', attrs={"class": G_PLUS_ABOUT_PAGE_COVER_CLASS_NAME}) | |
cover_url = cover['src'] | |
while True: | |
try: | |
print('Downloading ' + cover_url + ' ...') | |
urllib.urlretrieve(url=cover_url, filename=NEW_COVER_FILE_PATH) | |
print('Downloaded ' + cover_url) | |
break | |
except IOError: | |
time.sleep(URL_LIB_TIMEOUT_SLEEP) | |
# compare old and new cover | |
if os.path.exists(OLD_COVER_FILE_PATH): | |
old_cover = Image.open(OLD_COVER_FILE_PATH) | |
new_cover = Image.open(NEW_COVER_FILE_PATH) | |
if not compare_image_exactly(old_cover, new_cover): | |
print(NAME + ' has changed cover') | |
else: | |
print(NAME + ' has NOT changed cover') | |
os.remove(OLD_COVER_FILE_PATH) | |
# rename new cover to old cover | |
os.rename(NEW_COVER_FILE_PATH, OLD_COVER_FILE_PATH) | |
# blog | |
BLOG_URL = 'http://plumz.me/' | |
blog_content = '' | |
while True: | |
try: | |
print('Reading ' + BLOG_URL + ' ...') | |
blog_content = urllib.urlopen(BLOG_URL).read() | |
print("Read " + BLOG_URL) | |
break | |
except IOError: | |
time.sleep(URL_LIB_TIMEOUT_SLEEP) | |
blog_soup = BeautifulSoup(blog_content) | |
import codecs | |
# download blog post | |
latest_blog_entry = blog_soup.findAll(name='div', attrs={"class": "cont"})[0] | |
latest_blog_entry = latest_blog_entry.find(name='h2').text | |
latest_blog_entry_file = codecs.open(filename=NEW_BLOG_ENTRY_FILE_PATH, mode='w', encoding='utf-8') | |
latest_blog_entry_file.write(latest_blog_entry) | |
latest_blog_entry_file.close() | |
# compare old and new blog posts | |
if os.path.exists(OLD_BLOG_ENTRY_FILE_PATH): | |
old_blog_entry = codecs.open(filename=OLD_BLOG_ENTRY_FILE_PATH, encoding='utf-8') | |
new_blog_entry = codecs.open(filename=NEW_BLOG_ENTRY_FILE_PATH, encoding='utf-8') | |
if not old_blog_entry.read() == new_blog_entry.read(): | |
print(NAME + ' has published a blog entry') | |
else: | |
print(NAME + ' has NOT published a blog entry') | |
old_blog_entry.close() | |
new_blog_entry.close() | |
os.remove(OLD_BLOG_ENTRY_FILE_PATH) | |
# rename old blog entry to new | |
os.rename(NEW_BLOG_ENTRY_FILE_PATH, OLD_BLOG_ENTRY_FILE_PATH) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment