Created
October 31, 2014 08:49
-
-
Save SubOptimal/acd3059a0fde04157b6b to your computer and use it in GitHub Desktop.
extract a tar file with multibyte and reserved characters in filename on Windows
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// anser for: http://stackoverflow.com/questions/26656427/how-to-extract-tar-gz-including-multi-byte-characters-and-prohibited-characters | |
package tar; | |
import org.apache.tools.tar.*; | |
import java.io.*; | |
import java.nio.file.*; | |
import java.util.zip.GZIPInputStream; | |
public class Targz { | |
private static final String RESERVED_CHAR = "[<>:\\\"/\\\\|?\\*]"; | |
public static void main(String... args) throws IOException { | |
// the example file "foo.tar.gz" contains the files | |
// "foo¥_\.file" | |
// "foo¥_?.file" | |
extract(Paths.get("foo.tar.gz")); | |
} | |
public static void extract(Path path) throws IOException { | |
if (!path.toString().endsWith(".tar.gz")) { | |
throw new Error("extension must be tar.gz."); | |
} | |
try (TarInputStream tin = new TarInputStream(new GZIPInputStream(new FileInputStream(path.toFile())))) { | |
for (TarEntry tarEnt = tin.getNextEntry(); tarEnt != null; tarEnt = tin.getNextEntry()) { | |
String entryName = tarEnt.getName(); | |
System.out.println("tarEnt = " + entryName); | |
/* | |
replace all reserved characters | |
http://msdn.microsoft.com/en-us/library/windows/desktop/aa365247%28v=vs.85%29.aspx | |
< (less than) | |
> (greater than) | |
: (colon) | |
" (double quote) | |
/ (forward slash) | |
\ (backslash) | |
| (vertical bar or pipe) | |
? (question mark) | |
* (asterisk) | |
*/ | |
String sanitized = entryName.replaceAll(RESERVED_CHAR, "_"); | |
// check is needed to check if directory/file already exist | |
if (tarEnt.isDirectory()) { | |
new File(sanitized).mkdir(); | |
} else { | |
try (FileOutputStream fos = new FileOutputStream(new File(sanitized))) { | |
tin.copyEntryContents(fos); | |
} | |
} | |
} | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment