Created
September 14, 2023 12:38
-
-
Save icai/047f59f53affaf1275e4df5daf87be8e to your computer and use it in GitHub Desktop.
fetch file content to local
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const fs = require('fs') | |
const path = require('path') | |
const https = require('https') | |
const url = require('url') | |
// Function to fetch a file from a server and save it locally | |
async function fetchAndSaveFile(url, localFilePath) { | |
// mkdir -p localFilePath | |
const dirname = path.dirname(localFilePath) | |
if (!fs.existsSync(dirname)) { | |
fs.mkdirSync(dirname, { recursive: true }) | |
} | |
try { | |
return new Promise((resolve, reject) => { | |
https | |
.get(url, (response) => { | |
if (response.statusCode === 200) { | |
// Create a writable stream to save the contents of the remote file | |
const fileStream = fs.createWriteStream(localFilePath) | |
// Pipe the response data (remote file contents) to the local file stream | |
response.pipe(fileStream) | |
// Handle the 'end' event to know when the file download is complete | |
fileStream.on('finish', () => resolve) | |
} else { | |
console.error(`Failed to fetch remote file. Status code: ${response.statusCode}`) | |
} | |
}) | |
.on('error', (error) => { | |
console.error(error, 'Failed to fetch remote file') | |
}) | |
}) | |
} catch (error) { | |
throw error | |
} | |
} | |
function scanDirectory(directory, contentPattern) { | |
let matches = [] | |
// ignore node_modules, .git and all dot starting files | |
if (directory.match(/node_modules|\.git|^\./)) { | |
return [] | |
} | |
fs.readdirSync(directory).forEach((file) => { | |
const filePath = path.join(directory, file) | |
// Check if the file matches the specified pattern | |
if (fs.statSync(filePath).isFile()) { | |
// ignore png and jpg etc image files | |
if (filePath.match(/\.(png|jpg|jpeg|gif|svg)$/)) { | |
return [] | |
} | |
// Read the entire file content | |
const fileContent = fs.readFileSync(filePath, 'utf-8') | |
// Check if the content matches the specified pattern | |
if (fileContent.match(contentPattern)) { | |
// matched push matched content | |
matches.push(...fileContent.match(contentPattern)) | |
// match content replace to cut domain and start with /images/agreement | |
const replacedContent = fileContent.replace(contentPattern, (match) => { | |
const parsedUrl = url.parse(match) | |
return path.join('/images/agreement', parsedUrl.pathname) | |
}) | |
// write replaced content to file | |
fs.writeFileSync(filePath, replacedContent) | |
} | |
} else if (fs.statSync(filePath).isDirectory()) { | |
const result = scanDirectory(filePath, contentPattern) | |
matches.push(...result) | |
} | |
}) | |
return matches | |
} | |
const matched = scanDirectory(__dirname, /https:\/\/(zhongshi\.info|fe\.gdzskj\.tech)\/[\w\-\/]+\.(html|pdf)/g) | |
// uniq matched array | |
// const uniq = [...new Set(matched)] | |
// uniq.forEach(async (item) => { | |
// const urlString = item | |
// // remote domain | |
// const parsedUrl = url.parse(urlString) | |
// await fetchAndSaveFile(urlString, path.join(__dirname, 'images/agreement', parsedUrl.pathname)) | |
// }) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment