Skip to content

Instantly share code, notes, and snippets.

@SubOptimal
Created October 31, 2014 08:49
Show Gist options
  • Save SubOptimal/acd3059a0fde04157b6b to your computer and use it in GitHub Desktop.
Save SubOptimal/acd3059a0fde04157b6b to your computer and use it in GitHub Desktop.
extract a tar file with multibyte and reserved characters in filename on Windows
// anser for: http://stackoverflow.com/questions/26656427/how-to-extract-tar-gz-including-multi-byte-characters-and-prohibited-characters
package tar;
import org.apache.tools.tar.*;
import java.io.*;
import java.nio.file.*;
import java.util.zip.GZIPInputStream;
public class Targz {
private static final String RESERVED_CHAR = "[<>:\\\"/\\\\|?\\*]";
public static void main(String... args) throws IOException {
// the example file "foo.tar.gz" contains the files
// "foo¥_\.file"
// "foo¥_?.file"
extract(Paths.get("foo.tar.gz"));
}
public static void extract(Path path) throws IOException {
if (!path.toString().endsWith(".tar.gz")) {
throw new Error("extension must be tar.gz.");
}
try (TarInputStream tin = new TarInputStream(new GZIPInputStream(new FileInputStream(path.toFile())))) {
for (TarEntry tarEnt = tin.getNextEntry(); tarEnt != null; tarEnt = tin.getNextEntry()) {
String entryName = tarEnt.getName();
System.out.println("tarEnt = " + entryName);
/*
replace all reserved characters
http://msdn.microsoft.com/en-us/library/windows/desktop/aa365247%28v=vs.85%29.aspx
< (less than)
> (greater than)
: (colon)
" (double quote)
/ (forward slash)
\ (backslash)
| (vertical bar or pipe)
? (question mark)
* (asterisk)
*/
String sanitized = entryName.replaceAll(RESERVED_CHAR, "_");
// check is needed to check if directory/file already exist
if (tarEnt.isDirectory()) {
new File(sanitized).mkdir();
} else {
try (FileOutputStream fos = new FileOutputStream(new File(sanitized))) {
tin.copyEntryContents(fos);
}
}
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment