Created
September 19, 2014 02:19
-
-
Save DinisCruz/5ee1f7a458e864b4d2bc to your computer and use it in GitHub Desktop.
Coffee script(s) to unzip , convert (xml to json), load (xml, json) and filter (json) TM Library files (the Uno has 28,468,558 bytes (38.2 MB on disk) for 4,998 items)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
fs = require 'fs' | |
sax = require 'sax' | |
file = require 'file' | |
path = require 'path' | |
AdmZip = require('adm-zip') | |
xml2js = require('xml2js') | |
rimraf = require 'rimraf' | |
unzip = require 'unzip' | |
expect = require('chai').expect | |
describe 'open-tm-files', -> | |
xit 'check that zip files exist', -> | |
expect(fs.existsSync('./data')).to.be.true | |
expect(fs.existsSync('./data/zip_Files')).to.be.true | |
expect(fs.existsSync('./data/zip_Files')).to.be.true | |
expect(fs.existsSync('./data/zip_Files/Lib_HTML5-master.zip')).to.be.true | |
unzipFile= (zipFile, done) -> | |
sourceZip = "./data/zip_Files/#{zipFile}.zip" | |
targetFolder = "./data/unziped" | |
helpLib = path.join(targetFolder, file) | |
rimraf.sync(helpLib) | |
rimraf.sync("./" + file) | |
expect(fs.existsSync(helpLib)).to.be.false | |
# 1598ms using OSX unzip | |
### | |
process = require('child_process').spawn('unzip',['-q', sourceZip]) | |
process.stdout.on 'data', (data) -> console.log ''+data | |
process.on 'exit', -> | |
console.log('done...') | |
done() | |
return | |
### | |
#6749ms using AdmZip | |
### | |
zip = new AdmZip(sourceZip) | |
zipEntries = zip.getEntries() | |
zip.extractAllTo(targetFolder) | |
console.log zipEntries.length | |
done() | |
return | |
### | |
##7992ms using unzip.Extract | |
unzipExtractor = unzip.Extract({ path: targetFolder }) | |
unzipExtractor.on 'close', -> | |
console.log 'zip finished' | |
expect(fs.existsSync(helpLib)).to.be.true | |
done() | |
fs.createReadStream(sourceZip).pipe(unzipExtractor) | |
xit 'unzips Lib_HTML5', (done) -> | |
unzipFile('Lib_HTML5-master',done) # ✓ unzips Lib_HTML5 (348ms) | |
xit 'unzips Lib_Uno', (done) -> | |
this.timeout(40000) | |
unzipFile('Lib_UNO-master',done) # ✓ unzips Lib_Uno (6882ms) | |
xit 'read xml', ()-> | |
this.timeout(10000) | |
zipFile = 'Lib_HTML5-master' # files: 164 ✓ read xml (315ms) | |
#zipFile = 'Lib_UNO-master' # files: 4996 ✓ read xml (6711ms) | |
unzipedFolder = "./data/unziped/#{zipFile}" | |
#console.log unzipedFolder | |
filesProcessed = 0 | |
file.walkSync(unzipedFolder, (dirpath, dirs, files) -> | |
#console.log files | |
files.forEach (_file) -> | |
#firstFile = path.join(dirpath,files[0]) | |
if (path.extname(_file) == '.xml') | |
#console.log(_file) | |
data = fs.readFileSync path.join(dirpath,_file) | |
parser = new xml2js.Parser() | |
parser.parseString data, (err, result) -> | |
if(result["TeamMentor_Article"]) | |
#console.log result["TeamMentor_Article"].Metadata[0].Title[0] | |
filesProcessed++ | |
#parser = sax.parser() | |
#file_buf = fs.readFileSync(firstFile) | |
#parser.write(file_buf.toString('utf8')).close() | |
#console.log(parser) | |
) | |
console.log "files processed: #{filesProcessed}" | |
xit 'read xml -> save JSON', ()-> | |
this.timeout(15000) | |
zipFile = 'Lib_HTML5-master' # files: 165 ✓ xml -> JSON (386ms) | |
zipFile = 'Lib_UNO-master' # files: 4997 ✓ xml -> JSON(8619ms) | |
unzipedFolder = "./data/unziped/#{zipFile}" | |
jsonFolder = "./data/json/#{zipFile}" | |
if fs.existsSync(jsonFolder)== false | |
#fs.mkdirSync('./data/json') | |
fs.mkdirSync(jsonFolder) | |
console.log "JSON files saved to #{jsonFolder}" | |
filesProcessed = 0 | |
file.walkSync(unzipedFolder, (dirpath, dirs, files) -> | |
files.forEach (_file) -> | |
if (path.extname(_file) == '.xml') | |
jsonFile = path.join(jsonFolder, _file + ".json") | |
data = fs.readFileSync path.join(dirpath,_file) | |
parser = new xml2js.Parser() | |
parser.parseString data, (err, result) -> | |
fs.writeFileSync(jsonFile, JSON.stringify(result,null, ' ')) | |
#console.log(jsonFile) | |
filesProcessed++ | |
#file_buf = fs.readFileSync(firstFile) | |
#parser.write(file_buf.toString('utf8')).close() | |
#console.log(parser) | |
) | |
console.log "files processed: #{filesProcessed}" | |
xit 'read JSON', ()-> | |
#this.timeout(5000) | |
zipFile = 'Lib_HTML5-master' # files: 164 ✓ JSON load (21ms) | |
zipFile = 'Lib_UNO-master' # files: 4997 ✓ xml -> JSON(561ms) | |
jsonFolder = "./data/json/#{zipFile}" | |
filesProcessed = 0 | |
file.walkSync(jsonFolder, (dirpath, dirs, files) -> | |
files.forEach (_file)-> | |
result = JSON.parse fs.readFileSync(path.join(dirpath, _file)) | |
if result["TeamMentor_Article"] | |
#console.log result["TeamMentor_Article"].Metadata[0].Title[0] | |
filesProcessed++ | |
) | |
console.log "files processed: #{filesProcessed}" | |
it 'filter JSON', ()-> | |
zipFile = 'Lib_UNO-master' # data load and filer in 580ms | |
jsonFolder = "./data/json/#{zipFile}" | |
files = fs.readdirSync(jsonFolder) | |
articles = [] | |
files.forEach (file) -> | |
article = JSON.parse fs.readFileSync(path.join(jsonFolder, file)) | |
if article["TeamMentor_Article"] | |
articles.push(article["TeamMentor_Article"]) | |
console.log "there are #{articles.length} articles loaded" | |
matches = [] | |
search = "XSS" #Logging" | |
articles.forEach (article) -> | |
title = article.Metadata[0].Title[0] | |
if(title.indexOf(search,0) >-1) | |
matches.push({title: title, article: article}) | |
console.log "there are #{matches.length} matches for #{search}" | |
console.log(matches) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment