Last active
October 10, 2019 06:15
-
-
Save emreberber/275208425fa98e2c8c19d818d514ef61 to your computer and use it in GitHub Desktop.
blogsaver.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# | |
# Post Saver for Coder Ghost Theme | |
# written by Emre Berber | |
# | |
# emreberber.systems | |
# github.com/emreberber | |
# | |
import urllib2 | |
import re | |
import requests | |
from bs4 import BeautifulSoup | |
import os | |
def get_between(s, first, last): | |
try: | |
start = s.index(first) + len(first) | |
end = s.index(last, start) | |
return s[start:end].strip() | |
except ValueError: | |
return -1 | |
source_url = 'https://emreberber.systems/ubuntu-server-16-04-jira-kurulumu/' | |
response = urllib2.urlopen(source_url) | |
page_source = response.read() | |
source_html = get_between(page_source, '<section class="col-xs-12">', '</section>') | |
title = get_between(page_source, '<title>', '|') | |
os.mkdir(os.getcwd() + '/' + title) | |
os.mkdir(os.getcwd() + '/' + title + '/img') | |
# cd ./title | |
os.chdir(os.getcwd() + '/' + title) | |
# write source_html code into the .html file | |
with open(title + '.html', "w") as text_file: | |
text_file.write("{0}".format(source_html)) | |
# cd ./img | |
os.chdir(os.getcwd() + '/img') | |
response = requests.get(source_url) | |
soup = BeautifulSoup(response.text, 'html.parser') | |
img_tags = soup.find_all('img') | |
urls = [img['src'] for img in img_tags] | |
# save images into img folder | |
for url in urls: | |
filename = re.search(r'/([\w_-]+[.](jpg|jpeg|gif|png))$', url) | |
with open(filename.group(1), 'wb') as f: | |
if 'http' not in url: | |
print(url) | |
url = '{}{}'.format('https://emreberber.systems/', url) | |
response = requests.get(url) | |
f.write(response.content) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment