Skip to content

Instantly share code, notes, and snippets.

@KnowledgeGarden
Last active February 7, 2017 09:23
Show Gist options
  • Save KnowledgeGarden/90cecd04d0de14809253 to your computer and use it in GitHub Desktop.
Save KnowledgeGarden/90cecd04d0de14809253 to your computer and use it in GitHub Desktop.
BZ2 Importer for YodaQA
import java.io.*;
import javax.swing.JFileChooser;
import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
/**
* @author park
* @license Apache 2
*/
public class Main {
private PrintWriter out;
/**
*
*/
public Main() {
//Get the Directory to load
JFileChooser chooser = new JFileChooser(new File("."));
chooser.setDialogTitle("EnWiki-Text");
chooser.setFileSelectionMode(JFileChooser.DIRECTORIES_ONLY);
int retVal = chooser.showOpenDialog(null);
if(retVal == JFileChooser.APPROVE_OPTION) {
File dir = chooser.getSelectedFile();
//Create output file
System.out.println(dir.getAbsolutePath());
chooser.setDialogTitle("SaveAs");
retVal = chooser.showSaveDialog(null);
if(retVal == JFileChooser.APPROVE_OPTION) {
File outFile = chooser.getSelectedFile();
try {
FileOutputStream os = new FileOutputStream(outFile);
out = new PrintWriter(os);
out.print("<wikitext>\n");
File [] directories = dir.listFiles();
int len = directories.length;
for (int i=0;i<len;i++) {
System.out.println("DIR: "+directories[i].getName());
processDirectory(directories[i]);
}
out.print("</wikitext>\n");
out.flush();
out.close();
} catch (Exception e) {
e.printStackTrace();
}
}
}
}
private void processDirectory(File d) throws Exception {
File [] files = d.listFiles();
int len = files.length;
for (int i=0;i<len;i++) {
System.out.println("FILE: "+files[i].getName());
processFile(files[i]);
}
}
/**
* Write text to out
* Code borrowed from: http://stackoverflow.com/questions/2322944/uncompress-bzip2-archive
* @param f a .bz2 file
*/
private void processFile(File f) throws Exception {
FileInputStream in = new FileInputStream(f);
BZip2CompressorInputStream bzIn = new BZip2CompressorInputStream(in);
int n = 0;
while (-1 != (n = bzIn.read())) {
out.print((char)n);
}
bzIn.close();
}
/**
* @param args
*/
public static void main(String[] args) {
new Main();
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment