Skip to content

Instantly share code, notes, and snippets.

@temberature
Created February 15, 2019 15:52
Show Gist options
  • Save temberature/02fcb0f68ae5ab8abafa63ec57a418ac to your computer and use it in GitHub Desktop.
Save temberature/02fcb0f68ae5ab8abafa63ec57a418ac to your computer and use it in GitHub Desktop.
var fs = require('fs');
var path = require('path');
var OpenCC = require('opencc');
// Load the default Simplified to Traditional config
var opencc = new OpenCC('t2s.json');
// Sync API
var converted = opencc.convertSync("漢字");
console.log(converted);
const dir = './8167-[国民经济学原理].(奥)卡尔·门格尔.扫描版.pdf/'
fs.readdirSync(dir).forEach(base => {
var p = path.parse(base)
if (p.ext === '.json') {
var ocr = JSON.parse(fs.readFileSync(dir + base))
console.log(ocr)
try {
var content = ocr['responses'][0]['fullTextAnnotation']['text'].replace(/(.{20,})\n/g, '$1')
fs.writeFileSync(dir + p.name + '.txt', opencc.convertSync(content))
} catch (e) {
console.log(e)
}
}
})
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment