Skip to content

Instantly share code, notes, and snippets.

Created December 25, 2022 05:14
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
Star You must be signed in to star a gist
What would you like to do?
Script to download images from blogger and replace them in a markdown file
# Script to download images from blogger and replace them in a markdown file
# Works with markdown files generated by blog2md
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from import By
import time
import urllib
from selenium.webdriver.firefox.options import Options
import os
# Use selenium with a profile that is logged into google
ffOptions = Options()
driver = webdriver.Firefox(options=ffOptions)
# Open file as lines
inputfile = open('', 'r')
Lines = inputfile.readlines()
# Rename the file
os.rename('', 'index.mdold')
count = 0
for line in Lines:
count += 1
# images generated by blog2md have lines that begin with [!
if line[0:2] == "[!":
#print (line)
stext = line.split("(")
#print (stext)
#print (stext[1])
# Get the image url
imgurl = stext[1].split(")")[0]
#print (imgurl)
# Get the filename from the url
filename = imgurl.split("/")[-1]
#print (filename)
# Get the extension of the file
# This works 90% of the time
extension = filename.split(".")[1]
#print (extension)
# Create a new file name to avoid errors
newfilename = "image" + str(count-1) + "." + extension
mdname = "![" + newfilename + "](" + newfilename + ")"
#mdname = "![" + filename + "](" + filename + ")"
#print (mdname)
Lines[count-1] = mdname
# Use to get images for one post that was very broken
#archiveurl = "" + imgurl
# Get the image and save it
img = driver.find_element(By.XPATH, '/html/body/img')
src = img.get_attribute('src')
urllib.request.urlretrieve(src, filename)
os.rename( filename, newfilename)
#print (Lines[10])
# Write the lines back into a markdown file
newfile = open('', 'w')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment