Skip to content

Instantly share code, notes, and snippets.

@awesomephant
Created April 22, 2018 16:46
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save awesomephant/e709a9c9ffeea5901c8589888c785661 to your computer and use it in GitHub Desktop.
Save awesomephant/e709a9c9ffeea5901c8589888c785661 to your computer and use it in GitHub Desktop.
Switchboard Parser
const fs = require('fs');
const path = require('path');
const parse = require('csv-parse/lib/sync');
const mkdirp = require('mkdirp');
const dataDir = './data/switchboard_conversations/';
const outputDir = './data/clean/';
let topics = fs.readdirSync(dataDir)
let data = [];
for (let i = 0; i < topics.length; i++) {
let topic = topics[i]
let files = fs.readdirSync(dataDir + topic)
console.log('Topic: ' + topic);
console.log(files.length + ' dialogues found.\n');
for (let i = 0; i < files.length; i++) {
let filename = files[i];
let file = fs.readFileSync(dataDir + topic + '/' + filename, 'utf-8')
let records = parse(file, { columns: true });
// Let's make a clean text file
let dialogue = '';
dialogue += records[0].prompt + '\nSWDA_Filename' + records[0].swda_filename + '\nConversation No' + records[0].conversation_no + '\n\n'
for (let a = 0; a < records.length; a++) {
dialogue += records[a].caller + '.' + records[a].utterance_index + ': ' + records[a].clean_text + '\n'
}
let outputPath = outputDir + topic;
mkdirp(outputPath, function (err) {
fs.writeFileSync(outputDir + topic + '/' + filename + '.txt', dialogue, 'utf-8')
});
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment