Last active
February 7, 2017 09:23
-
-
Save KnowledgeGarden/90cecd04d0de14809253 to your computer and use it in GitHub Desktop.
BZ2 Importer for YodaQA
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.io.*; | |
import javax.swing.JFileChooser; | |
import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream; | |
/** | |
* @author park | |
* @license Apache 2 | |
*/ | |
public class Main { | |
private PrintWriter out; | |
/** | |
* | |
*/ | |
public Main() { | |
//Get the Directory to load | |
JFileChooser chooser = new JFileChooser(new File(".")); | |
chooser.setDialogTitle("EnWiki-Text"); | |
chooser.setFileSelectionMode(JFileChooser.DIRECTORIES_ONLY); | |
int retVal = chooser.showOpenDialog(null); | |
if(retVal == JFileChooser.APPROVE_OPTION) { | |
File dir = chooser.getSelectedFile(); | |
//Create output file | |
System.out.println(dir.getAbsolutePath()); | |
chooser.setDialogTitle("SaveAs"); | |
retVal = chooser.showSaveDialog(null); | |
if(retVal == JFileChooser.APPROVE_OPTION) { | |
File outFile = chooser.getSelectedFile(); | |
try { | |
FileOutputStream os = new FileOutputStream(outFile); | |
out = new PrintWriter(os); | |
out.print("<wikitext>\n"); | |
File [] directories = dir.listFiles(); | |
int len = directories.length; | |
for (int i=0;i<len;i++) { | |
System.out.println("DIR: "+directories[i].getName()); | |
processDirectory(directories[i]); | |
} | |
out.print("</wikitext>\n"); | |
out.flush(); | |
out.close(); | |
} catch (Exception e) { | |
e.printStackTrace(); | |
} | |
} | |
} | |
} | |
private void processDirectory(File d) throws Exception { | |
File [] files = d.listFiles(); | |
int len = files.length; | |
for (int i=0;i<len;i++) { | |
System.out.println("FILE: "+files[i].getName()); | |
processFile(files[i]); | |
} | |
} | |
/** | |
* Write text to out | |
* Code borrowed from: http://stackoverflow.com/questions/2322944/uncompress-bzip2-archive | |
* @param f a .bz2 file | |
*/ | |
private void processFile(File f) throws Exception { | |
FileInputStream in = new FileInputStream(f); | |
BZip2CompressorInputStream bzIn = new BZip2CompressorInputStream(in); | |
int n = 0; | |
while (-1 != (n = bzIn.read())) { | |
out.print((char)n); | |
} | |
bzIn.close(); | |
} | |
/** | |
* @param args | |
*/ | |
public static void main(String[] args) { | |
new Main(); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment