Skip to content

Instantly share code, notes, and snippets.

@icai
Created September 14, 2023 12:38
Show Gist options
  • Save icai/047f59f53affaf1275e4df5daf87be8e to your computer and use it in GitHub Desktop.
Save icai/047f59f53affaf1275e4df5daf87be8e to your computer and use it in GitHub Desktop.
fetch file content to local
const fs = require('fs')
const path = require('path')
const https = require('https')
const url = require('url')
// Function to fetch a file from a server and save it locally
async function fetchAndSaveFile(url, localFilePath) {
// mkdir -p localFilePath
const dirname = path.dirname(localFilePath)
if (!fs.existsSync(dirname)) {
fs.mkdirSync(dirname, { recursive: true })
}
try {
return new Promise((resolve, reject) => {
https
.get(url, (response) => {
if (response.statusCode === 200) {
// Create a writable stream to save the contents of the remote file
const fileStream = fs.createWriteStream(localFilePath)
// Pipe the response data (remote file contents) to the local file stream
response.pipe(fileStream)
// Handle the 'end' event to know when the file download is complete
fileStream.on('finish', () => resolve)
} else {
console.error(`Failed to fetch remote file. Status code: ${response.statusCode}`)
}
})
.on('error', (error) => {
console.error(error, 'Failed to fetch remote file')
})
})
} catch (error) {
throw error
}
}
function scanDirectory(directory, contentPattern) {
let matches = []
// ignore node_modules, .git and all dot starting files
if (directory.match(/node_modules|\.git|^\./)) {
return []
}
fs.readdirSync(directory).forEach((file) => {
const filePath = path.join(directory, file)
// Check if the file matches the specified pattern
if (fs.statSync(filePath).isFile()) {
// ignore png and jpg etc image files
if (filePath.match(/\.(png|jpg|jpeg|gif|svg)$/)) {
return []
}
// Read the entire file content
const fileContent = fs.readFileSync(filePath, 'utf-8')
// Check if the content matches the specified pattern
if (fileContent.match(contentPattern)) {
// matched push matched content
matches.push(...fileContent.match(contentPattern))
// match content replace to cut domain and start with /images/agreement
const replacedContent = fileContent.replace(contentPattern, (match) => {
const parsedUrl = url.parse(match)
return path.join('/images/agreement', parsedUrl.pathname)
})
// write replaced content to file
fs.writeFileSync(filePath, replacedContent)
}
} else if (fs.statSync(filePath).isDirectory()) {
const result = scanDirectory(filePath, contentPattern)
matches.push(...result)
}
})
return matches
}
const matched = scanDirectory(__dirname, /https:\/\/(zhongshi\.info|fe\.gdzskj\.tech)\/[\w\-\/]+\.(html|pdf)/g)
// uniq matched array
// const uniq = [...new Set(matched)]
// uniq.forEach(async (item) => {
// const urlString = item
// // remote domain
// const parsedUrl = url.parse(urlString)
// await fetchAndSaveFile(urlString, path.join(__dirname, 'images/agreement', parsedUrl.pathname))
// })
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment