Created
November 14, 2021 05:11
-
-
Save Konfekt/fe8728ac15bdc294f4e20994f1f627ac to your computer and use it in GitHub Desktop.
run tika (= document text viewer) in cmd using UTF-8 encoding
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@echo off | |
setlocal | |
rem From https://tika.apache.org/download.html | |
set tika_jar="%USERPROFILE%\bin\tika.jar" | |
rem From https://superuser.com/questions/1118106/can-the-utf-8-code-page-identifier-65001-be-different-on-other-computers/1121076#1121076 | |
rem capture output of chcp after colon | |
for /F "tokens=2 delims=:" %%G in ('chcp') do set "_chcp=%%G" | |
rem strip trailing dots | |
IF "%_chcp:~-1%"=="." set "_chcp=%_chcp:~0,-1%" | |
rem change active code page to UTF-8 and back | |
cmd /U /D /c CHCP 65001>NUL & java -Dfile.encoding=UTF-8 -jar %tika_jar% %* 2>NUL & CHCP %_chcp%>NUL |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment