Skip to content

Instantly share code, notes, and snippets.

@DinisCruz
Created September 19, 2014 02:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save DinisCruz/5ee1f7a458e864b4d2bc to your computer and use it in GitHub Desktop.
Save DinisCruz/5ee1f7a458e864b4d2bc to your computer and use it in GitHub Desktop.
Coffee script(s) to unzip , convert (xml to json), load (xml, json) and filter (json) TM Library files (the Uno has 28,468,558 bytes (38.2 MB on disk) for 4,998 items)
fs = require 'fs'
sax = require 'sax'
file = require 'file'
path = require 'path'
AdmZip = require('adm-zip')
xml2js = require('xml2js')
rimraf = require 'rimraf'
unzip = require 'unzip'
expect = require('chai').expect
describe 'open-tm-files', ->
xit 'check that zip files exist', ->
expect(fs.existsSync('./data')).to.be.true
expect(fs.existsSync('./data/zip_Files')).to.be.true
expect(fs.existsSync('./data/zip_Files')).to.be.true
expect(fs.existsSync('./data/zip_Files/Lib_HTML5-master.zip')).to.be.true
unzipFile= (zipFile, done) ->
sourceZip = "./data/zip_Files/#{zipFile}.zip"
targetFolder = "./data/unziped"
helpLib = path.join(targetFolder, file)
rimraf.sync(helpLib)
rimraf.sync("./" + file)
expect(fs.existsSync(helpLib)).to.be.false
# 1598ms using OSX unzip
###
process = require('child_process').spawn('unzip',['-q', sourceZip])
process.stdout.on 'data', (data) -> console.log ''+data
process.on 'exit', ->
console.log('done...')
done()
return
###
#6749ms using AdmZip
###
zip = new AdmZip(sourceZip)
zipEntries = zip.getEntries()
zip.extractAllTo(targetFolder)
console.log zipEntries.length
done()
return
###
##7992ms using unzip.Extract
unzipExtractor = unzip.Extract({ path: targetFolder })
unzipExtractor.on 'close', ->
console.log 'zip finished'
expect(fs.existsSync(helpLib)).to.be.true
done()
fs.createReadStream(sourceZip).pipe(unzipExtractor)
xit 'unzips Lib_HTML5', (done) ->
unzipFile('Lib_HTML5-master',done) # ✓ unzips Lib_HTML5 (348ms)
xit 'unzips Lib_Uno', (done) ->
this.timeout(40000)
unzipFile('Lib_UNO-master',done) # ✓ unzips Lib_Uno (6882ms)
xit 'read xml', ()->
this.timeout(10000)
zipFile = 'Lib_HTML5-master' # files: 164 ✓ read xml (315ms)
#zipFile = 'Lib_UNO-master' # files: 4996 ✓ read xml (6711ms)
unzipedFolder = "./data/unziped/#{zipFile}"
#console.log unzipedFolder
filesProcessed = 0
file.walkSync(unzipedFolder, (dirpath, dirs, files) ->
#console.log files
files.forEach (_file) ->
#firstFile = path.join(dirpath,files[0])
if (path.extname(_file) == '.xml')
#console.log(_file)
data = fs.readFileSync path.join(dirpath,_file)
parser = new xml2js.Parser()
parser.parseString data, (err, result) ->
if(result["TeamMentor_Article"])
#console.log result["TeamMentor_Article"].Metadata[0].Title[0]
filesProcessed++
#parser = sax.parser()
#file_buf = fs.readFileSync(firstFile)
#parser.write(file_buf.toString('utf8')).close()
#console.log(parser)
)
console.log "files processed: #{filesProcessed}"
xit 'read xml -> save JSON', ()->
this.timeout(15000)
zipFile = 'Lib_HTML5-master' # files: 165 ✓ xml -> JSON (386ms)
zipFile = 'Lib_UNO-master' # files: 4997 ✓ xml -> JSON(8619ms)
unzipedFolder = "./data/unziped/#{zipFile}"
jsonFolder = "./data/json/#{zipFile}"
if fs.existsSync(jsonFolder)== false
#fs.mkdirSync('./data/json')
fs.mkdirSync(jsonFolder)
console.log "JSON files saved to #{jsonFolder}"
filesProcessed = 0
file.walkSync(unzipedFolder, (dirpath, dirs, files) ->
files.forEach (_file) ->
if (path.extname(_file) == '.xml')
jsonFile = path.join(jsonFolder, _file + ".json")
data = fs.readFileSync path.join(dirpath,_file)
parser = new xml2js.Parser()
parser.parseString data, (err, result) ->
fs.writeFileSync(jsonFile, JSON.stringify(result,null, ' '))
#console.log(jsonFile)
filesProcessed++
#file_buf = fs.readFileSync(firstFile)
#parser.write(file_buf.toString('utf8')).close()
#console.log(parser)
)
console.log "files processed: #{filesProcessed}"
xit 'read JSON', ()->
#this.timeout(5000)
zipFile = 'Lib_HTML5-master' # files: 164 ✓ JSON load (21ms)
zipFile = 'Lib_UNO-master' # files: 4997 ✓ xml -> JSON(561ms)
jsonFolder = "./data/json/#{zipFile}"
filesProcessed = 0
file.walkSync(jsonFolder, (dirpath, dirs, files) ->
files.forEach (_file)->
result = JSON.parse fs.readFileSync(path.join(dirpath, _file))
if result["TeamMentor_Article"]
#console.log result["TeamMentor_Article"].Metadata[0].Title[0]
filesProcessed++
)
console.log "files processed: #{filesProcessed}"
it 'filter JSON', ()->
zipFile = 'Lib_UNO-master' # data load and filer in 580ms
jsonFolder = "./data/json/#{zipFile}"
files = fs.readdirSync(jsonFolder)
articles = []
files.forEach (file) ->
article = JSON.parse fs.readFileSync(path.join(jsonFolder, file))
if article["TeamMentor_Article"]
articles.push(article["TeamMentor_Article"])
console.log "there are #{articles.length} articles loaded"
matches = []
search = "XSS" #Logging"
articles.forEach (article) ->
title = article.Metadata[0].Title[0]
if(title.indexOf(search,0) >-1)
matches.push({title: title, article: article})
console.log "there are #{matches.length} matches for #{search}"
console.log(matches)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment