Skip to content

Instantly share code, notes, and snippets.

@conancat
Last active July 30, 2019 09:27
Show Gist options
  • Save conancat/5dfd0252d0f3136fe920 to your computer and use it in GitHub Desktop.
Save conancat/5dfd0252d0f3136fe920 to your computer and use it in GitHub Desktop.
如何快速用nodejs的request和xml2js把xml下載下來,然後將xml轉換成js object或json檔案
# 如果要跑coffee的話,記得先安裝coffee-script
# npm install coffee-script -g
# npm install request xml2js
# coffee requestXmlToJs.coffee
# 先需要拿到一些package
fs = require "fs"
path = require "path"
request = require "request"
xml2js = require "xml2js"
# Function的開始
requestXmlToJs = (fileUrl, callback) ->
# 製造個暫時能用的tmp folder
tmpFolder = __dirname + "/tmp/"
if not fs.existsSync tmpFolder then fs.mkdirSync tmpFolder
# 從url拿到檔案的名字和設定檔案該存去哪裡
filename = path.basename fileUrl
filepath = tmpFolder + filename
# 快快去抓那個fileUrl來看看
request fileUrl
.on "error", callback # 有error的話快快彈回去
.on "response", (response) ->
# 在request給我們第一個回覆的時候我們趕快先暫停下載任何東西,
# 因為要先看看目前這個檔案我們是不是已經下載過了
this.pause()
# 先默認我們是需要下載檔案的
download = true
# 我們從header裏面拿content-length和last-modified來作校對
headers = response.headers
totalSize = parseInt headers["content-length"]
lastModified = new Date(headers["last-modified"])
# 看看我們有沒有已經下載過的檔案
if fs.existsSync filepath
currentFileStats = fs.statSync filepath
# 如果已經下載了的檔案的size和header傳回來的size是一樣的,
# 還有我們檔案最後更新時間是大於header傳回來的最後更新時間,
# 我們便能確保說我們已經下載的檔案沒有最新更新,那就不需要重新下載了, 直接跳過
if currentFileStats.size is totalSize and lastModified <= currentFileStats.mtime
download = false
# 如果需要下載的話,我們就用request stream的功能直接把檔案下載下來
# 只需要pipe到一個新創建的writeStream就可以直接下載了
# 製造了pipe過後我們就可以告訴這個request可以resume(),繼續跑
if download
console.log "Downloading file #{fileUrl} to #{filepath}"
this.pipe fs.createWriteStream(filepath)
this.resume()
# 不需要下載的話我們直接跳過, 直接跳到讀取xml的步驟
else
console.log "Existing file found, no changes detected, skipping download"
this.emit "end"
.on "end", ->
console.log "Begin parsing XML"
# 先讀取已經下載過的檔案
fs.readFile filepath, (err, data) ->
if err then return callback err
# 把讀取的data轉成string,我們便能讓xml2js.parseString的功能將xml變成
# 漂亮的Javascript Object
xml2js.parseString data.toString(), callback
# 現在我們便能call我們的function,來試下看看有沒有用
url = "http://opendata.cwb.gov.tw/opendata/MMC/F-A0021-001.xml"
requestXmlToJs url, (err, result) ->
# 有 error的話我們要知道發生什麼事咯
if err then console.error err
# 把已經轉成JS Object的result log一下看看
console.log result
# 把xml轉成json file,這只是為了好玩罷了呼呼呼
console.log "Writing to JSON file"
fs.writeFileSync __dirname + "/tmp/F-A0021-001.json", JSON.stringify(result, null, "\t")
console.log "Done!"
var fs, path, request, requestXmlToJs, url, xml2js;
fs = require("fs");
path = require("path");
request = require("request");
xml2js = require("xml2js");
requestXmlToJs = function(fileUrl, callback) {
var filename, filepath, tmpFolder;
tmpFolder = __dirname + "/tmp/";
if (!fs.existsSync(tmpFolder)) {
fs.mkdirSync(tmpFolder);
}
filename = path.basename(fileUrl);
filepath = tmpFolder + filename;
return request(fileUrl).on("error", callback).on("response", function(response) {
var currentFileStats, download, headers, lastModified, totalSize;
this.pause();
download = true;
headers = response.headers;
totalSize = parseInt(headers["content-length"]);
lastModified = new Date(headers["last-modified"]);
if (fs.existsSync(filepath)) {
currentFileStats = fs.statSync(filepath);
if (currentFileStats.size === totalSize && lastModified <= currentFileStats.mtime) {
download = false;
}
}
if (download) {
console.log("Downloading file " + fileUrl + " to " + filepath);
this.pipe(fs.createWriteStream(filepath));
return this.resume();
} else {
console.log("Existing file found, no changes detected, skipping download");
return this.emit("end");
}
}).on("end", function() {
console.log("Begin parsing XML");
return fs.readFile(filepath, function(err, data) {
if (err) {
return callback(err);
}
return xml2js.parseString(data.toString(), callback);
});
});
};
url = "http://opendata.cwb.gov.tw/opendata/MMC/F-A0021-001.xml";
requestXmlToJs(url, function(err, result) {
if (err) {
console.error(err);
}
console.log(result);
console.log("Writing to JSON file");
fs.writeFileSync(__dirname + "/tmp/F-A0021-001.json", JSON.stringify(result, null, "\t"));
return console.log("Done!");
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment