seltzered/downloadRoamFirebase.py

## downloadRoamFirebase.py
# Opens files in directory, outputs firebase URLs to a file, downloads them, and replaces the links with a link to the new files.
# To use, replace PATH in the variable vaultDir with your vault's root directory.
# This automatically puts filenames in ../assets (Logseq style) - change the newFilePath variable if you want to change this

import re
import glob
import os
import requests
import calendar
import time

dryRun = False

#SET YOUR VAULT DIRECTORY HERE - whatever is a directory full of markdown files
vaultDir = '/path/to/my/logseq/pages'

# When generating image file with filename,
# remove special characters.
# (e.g. '[' and ']' for titles that also have [[blockrefs]])
filenamePrefixStripCharPattern = r'\[|\]'

# add a migration descriptor to the filename,
# just in case you need to ever do post-processing
# on only these migrated images.
filenameMigrationDescriptor = 'fromRoam'

#default assumed image extension if one isn't found
defaultAssumedExt = 'ASSUMEDPNG.png'

firebaseShort = 'none'
fullRead = 'none'
fileFullPath = ''
fullTempFilePath = ''
i = 0
ext = ''

# Walk through all files in all directories within the specified vault directory
for subdir, dirs, files in os.walk(vaultDir):
    for file in files:
        # Open file in directory
        print("filename: " + file)
        fileFullPath = os.path.join(subdir,file)
        fhand = open(fileFullPath, errors='ignore')
        fileImageCount = 0
        lineCount = 0
        for line in fhand:
            # Download the Firebase file and save it in the assets folder
            if ('firebasestorage' in line) or ('googleusercontent' in line):
                print('\nfirebasestorage or googleusercontent asset link found')
                try:
                    if ('firebasestorage' in line):
                        # If it's a PDF, it will be in the format {{pdf: link}}
                        if '{{pdf:' in line:
                            link = re.search(r'https://firebasestorage(.*)\?alt(.*)\}', line)
                        else:
                            link = re.search(r'https://firebasestorage(.*)\?alt(.*)\)', line)
                        firebaseShort = 'https://firebasestorage' + link.group(1) # https://firebasestorage.googleapis.com/v0/b/firescript-577a2.appspot.com/o/imgs%2Fapp%2FDownloadMyBrain%2FLy4Wel-rjk.png
                        resolvedUrl = link.group(0)[:-1] # https://firebasestorage.googleapis.com/v0/b/firescript-577a2.appspot.com/o/imgs%2Fapp%2FDownloadMyBrain%2FLy4Wel-rjk.png?alt=media&token=0fbafc8f-0a47-4720-9e68-88f70803ced6
                        print('link: ' + resolvedUrl)

                        # Download the file locally
                        #--------------------------------------------------
                        if not dryRun:
                            r = requests.get(resolvedUrl)
                        try:
                            # # Get file extension of file. Ex: .png; .jpeg
                            reg = re.search(r'(.*)\.(.+)', firebaseShort[-5:]) # a.png / .jpeg
                            ext = '.' + reg.group(2) # .jpeg
                        except:
                            ext = defaultAssumedExt
                    if ('googleusercontent' in line):
                        # assume some image copied over from a google doc
                        link = re.search(r'https://(.*)googleusercontent(.*)\)', line)
                        resolvedUrl = link.group(0)[:-1] # https://lh5.googleusercontent.com/YHmVyEd9j6bLx32A9YF8biKxdSfZk5HH-g8VZ4elg_szfojPTq3VE8FtcHZ1HN3_p8uhrcPKK6d4ynbc12BC2dDXuGsOtAPc7wxbF3auTcMBUEU5NNRNpNGuEyZZIKDVfyArf3IE
                        if not dryRun:
                            r = requests.get(resolvedUrl)
                        ext = defaultAssumedExt

                    print('\nextension: ' + ext)

                    # Create assets folder if it doesn't exist
                    if (not dryRun) and (not os.path.exists(vaultDir + '../assets')):
                        os.makedirs(vaultDir + '../assets')
                    # # Create new local file out of downloaded firebase file
                    filenamePrefix = os.path.splitext(file)[0]
                    filenamePrefix = re.sub(filenamePrefixStripCharPattern, '', filenamePrefix)
                    newFilePath = '../assets/' + filenamePrefix + '-' + filenameMigrationDescriptor + '-' + str(fileImageCount) + ext
                    print("\ncreating image:" + newFilePath)
                    print(resolvedUrl + '>>>' + newFilePath)
                    if not dryRun:
                        with open(vaultDir + '/' + newFilePath,'wb') as output_file:
                            output_file.write(r.content)
                    fileImageCount = fileImageCount + 1
                except AttributeError: # This is to prevent the AttributeError exception when no matches are returned
                    continue
                if not dryRun:
                    # Save Markdown file with new local file link as a temp file
                    # If there is already a temp version of a file, open that.
                    fullTempFilePath = vaultDir + '/temp_' + file
                    if os.path.exists(fullTempFilePath):
                        fullRead = open(fullTempFilePath, errors='ignore')
                    else:
                        fullRead = open(fileFullPath, errors='ignore')
                    data = fullRead.read()
                    data = data.replace(resolvedUrl,newFilePath)
                    with open(fullTempFilePath,'wt') as temp_file:
                        temp_file.write(data)
                        i = i + 1
                    if os.path.exists(fullTempFilePath):
                        path = os.replace(fullTempFilePath,fileFullPath)
                    fullRead.close()
        # Close file
        fhand.close()
	# Opens files in directory, outputs firebase URLs to a file, downloads them, and replaces the links with a link to the new files.
	# To use, replace PATH in the variable vaultDir with your vault's root directory.
	# This automatically puts filenames in ../assets (Logseq style) - change the newFilePath variable if you want to change this

	import re
	import glob
	import os
	import requests
	import calendar
	import time

	dryRun = False

	#SET YOUR VAULT DIRECTORY HERE - whatever is a directory full of markdown files
	vaultDir = '/path/to/my/logseq/pages'

	# When generating image file with filename,
	# remove special characters.
	# (e.g. '[' and ']' for titles that also have [[blockrefs]])
	filenamePrefixStripCharPattern = r'\[\|\]'

	# add a migration descriptor to the filename,
	# just in case you need to ever do post-processing
	# on only these migrated images.
	filenameMigrationDescriptor = 'fromRoam'

	#default assumed image extension if one isn't found
	defaultAssumedExt = 'ASSUMEDPNG.png'

	firebaseShort = 'none'
	fullRead = 'none'
	fileFullPath = ''
	fullTempFilePath = ''
	i = 0
	ext = ''

	# Walk through all files in all directories within the specified vault directory
	for subdir, dirs, files in os.walk(vaultDir):
	for file in files:
	# Open file in directory
	print("filename: " + file)
	fileFullPath = os.path.join(subdir,file)
	fhand = open(fileFullPath, errors='ignore')
	fileImageCount = 0
	lineCount = 0
	for line in fhand:
	# Download the Firebase file and save it in the assets folder
	if ('firebasestorage' in line) or ('googleusercontent' in line):
	print('\nfirebasestorage or googleusercontent asset link found')
	try:
	if ('firebasestorage' in line):
	# If it's a PDF, it will be in the format {{pdf: link}}
	if '{{pdf:' in line:
	link = re.search(r'https://firebasestorage(.)\?alt(.)\}', line)
	else:
	link = re.search(r'https://firebasestorage(.)\?alt(.)\)', line)
	firebaseShort = 'https://firebasestorage' + link.group(1) # https://firebasestorage.googleapis.com/v0/b/firescript-577a2.appspot.com/o/imgs%2Fapp%2FDownloadMyBrain%2FLy4Wel-rjk.png
	resolvedUrl = link.group(0)[:-1] # https://firebasestorage.googleapis.com/v0/b/firescript-577a2.appspot.com/o/imgs%2Fapp%2FDownloadMyBrain%2FLy4Wel-rjk.png?alt=media&token=0fbafc8f-0a47-4720-9e68-88f70803ced6
	print('link: ' + resolvedUrl)

	# Download the file locally
	#--------------------------------------------------
	if not dryRun:
	r = requests.get(resolvedUrl)
	try:
	# # Get file extension of file. Ex: .png; .jpeg
	reg = re.search(r'(.*)\.(.+)', firebaseShort[-5:]) # a.png / .jpeg
	ext = '.' + reg.group(2) # .jpeg
	except:
	ext = defaultAssumedExt
	if ('googleusercontent' in line):
	# assume some image copied over from a google doc
	link = re.search(r'https://(.)googleusercontent(.)\)', line)
	resolvedUrl = link.group(0)[:-1] # https://lh5.googleusercontent.com/YHmVyEd9j6bLx32A9YF8biKxdSfZk5HH-g8VZ4elg_szfojPTq3VE8FtcHZ1HN3_p8uhrcPKK6d4ynbc12BC2dDXuGsOtAPc7wxbF3auTcMBUEU5NNRNpNGuEyZZIKDVfyArf3IE
	if not dryRun:
	r = requests.get(resolvedUrl)
	ext = defaultAssumedExt

	print('\nextension: ' + ext)

	# Create assets folder if it doesn't exist
	if (not dryRun) and (not os.path.exists(vaultDir + '../assets')):
	os.makedirs(vaultDir + '../assets')
	# # Create new local file out of downloaded firebase file
	filenamePrefix = os.path.splitext(file)[0]
	filenamePrefix = re.sub(filenamePrefixStripCharPattern, '', filenamePrefix)
	newFilePath = '../assets/' + filenamePrefix + '-' + filenameMigrationDescriptor + '-' + str(fileImageCount) + ext
	print("\ncreating image:" + newFilePath)
	print(resolvedUrl + '>>>' + newFilePath)
	if not dryRun:
	with open(vaultDir + '/' + newFilePath,'wb') as output_file:
	output_file.write(r.content)
	fileImageCount = fileImageCount + 1
	except AttributeError: # This is to prevent the AttributeError exception when no matches are returned
	continue
	if not dryRun:
	# Save Markdown file with new local file link as a temp file
	# If there is already a temp version of a file, open that.
	fullTempFilePath = vaultDir + '/temp_' + file
	if os.path.exists(fullTempFilePath):
	fullRead = open(fullTempFilePath, errors='ignore')
	else:
	fullRead = open(fileFullPath, errors='ignore')
	data = fullRead.read()
	data = data.replace(resolvedUrl,newFilePath)
	with open(fullTempFilePath,'wt') as temp_file:
	temp_file.write(data)
	i = i + 1
	if os.path.exists(fullTempFilePath):
	path = os.replace(fullTempFilePath,fileFullPath)
	fullRead.close()
	# Close file
	fhand.close()