Skip to content

Instantly share code, notes, and snippets.

@khaledosman
Created March 7, 2021 15:23
Show Gist options
  • Save khaledosman/6ea6db4799ee3e2694e2ebff12d8f40d to your computer and use it in GitHub Desktop.
Save khaledosman/6ea6db4799ee3e2694e2ebff12d8f40d to your computer and use it in GitHub Desktop.
replace all links of all md files in a folder recursively with a new downloaded file link
const fs = require('fs')
const path = require('path')
const http = require('https')
const { promisify } = require('util')
const promisifiedReadFile = promisify(fs.readFile)
const promisifiedWriteFile = promisify(fs.writeFile)
// loops over all files in a directory recursively and returns filepaths to .md files
function getMdFilePaths (startPath) {
if (!fs.existsSync(startPath)) {
throw new Error(`directory ${startPath} does not exist't exist`)
}
const files = fs.readdirSync(startPath)
return files.flatMap(file => {
const filename = path.join(startPath, file)
const pathextend = path.parse(filename)
const stat = fs.lstatSync(filename)
if (stat.isDirectory()) {
return getMdFilePaths(filename) // recurse
} else if (pathextend.ext === '.md') {
return filename
} else {
return null
}
})
.filter(Boolean)
}
function processMdFiles (mdFiles) {
return Promise.all(
mdFiles.map(async (filePath) => processMdFile(filePath, mdFiles))
)
}
async function processMdFile (filePath) {
// copied from https://www.regextester.com/96504
const URL_REGEX = /(?:(?:https?|ftp):\/\/|\b(?:[a-z\d]+\.))(?:(?:[^\s()<>]+|\((?:[^\s()<>]+|(?:\([^\s()<>]+\)))?\))+(?:\((?:[^\s()<>]+|(?:\(?:[^\s()<>]+\)))?\)|[^\s`!()\[\]{};:'".,<>?«»“”‘’]))?/
const fileContent = await promisifiedReadFile(filePath, { encoding: 'utf8' })
const directoryOfFile = path.dirname(filePath)
console.log('processing ' + filePath)
const match = fileContent.match(URL_REGEX)
console.log({ match })
if (match) {
const matchedUrl = match[0]
const newFilePath = await downloadFile(matchedUrl, directoryOfFile)
console.log('file downloaded', newFilePath)
const newFileContent = fileContent.replace(URL_REGEX, newFilePath)
console.log('new content', newFileContent)
console.log(`replacing ${filePath} with ${newFileContent}`)
// TODO uncomment this to replace the original files with the new content
await promisifiedWriteFile(filePath, newFileContent)
return newFileContent
} else {
return fileContent
}
}
async function downloadFile (url, outputPath) {
return new Promise((resolve, reject) => {
// get file name from response https://stackoverflow.com/questions/20132064/node-js-download-file-using-content-disposition-as-filename
http.get(url, (response) => {
const fileNameFromContentDispositionHeader = response.headers['content-disposition'] ? response.headers['content-disposition'].split('\'\'') : null
const filename = fileNameFromContentDispositionHeader ? fileNameFromContentDispositionHeader[1] : `document ${Math.random()}.html` // response.headers.date
const newFilePath = path.join(outputPath, filename)
const writeStream = fs.createWriteStream(newFilePath)
response.pipe(writeStream)
writeStream.on('finish', () => {
// resolve the promise with the filePath once the file is created
writeStream.close(() => {
resolve(newFilePath)
})
})
writeStream.on('error', (err) => {
// delete the corrupted file and reject the promise if an error occurs
// fs.unlink(outputPath)
reject(err)
})
})
})
}
// run the program
(async function main () {
const START_PATH = 'root'
const mdFiles = getMdFilePaths(START_PATH)
console.log({ mdFiles })
await processMdFiles(mdFiles)
})()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment