Skip to content

Instantly share code, notes, and snippets.

@VanTanev
Last active May 21, 2019 10:33
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save VanTanev/ebc7a65994a70d375c8d8b922f8192bb to your computer and use it in GitHub Desktop.
Save VanTanev/ebc7a65994a70d375c8d8b922f8192bb to your computer and use it in GitHub Desktop.
A script to convert filenames with Japanese characters to romanized versions
#!/usr/bin/env bash
set -eu -o pipefail
# set -x
REMOVE_ORIGINALS=${REMOVE_ORIGINALS="0"}
ROMANIZED_DIR=${ROMANIZED_DIR="romanized"}
mkdir -p "$ROMANIZED_DIR"
main() {
count=`list_bad_files | wc -l`
index=1
list_bad_files | while IFS= read -r file; do process_file "$file" "$count" $((index++)); done
overwrite_line "Done"
}
process_file() {
local file=$1
local total=$(printf "%04d" $2)
local index=$(printf "%04d" $3)
local romanized_file=`generate_romanized_name "$file"`
overwrite_line "($index/$total) Processing: $romanized_file"
copy_file "$file" "$ROMANIZED_DIR/$romanized_file"
cond_remove_file "$file"
}
copy_file() {
rsync -a -- "$1" "$2" || cp -n -- "$1" "$2"
}
cond_remove_file() {
if [ "$REMOVE_ORIGINALS" -eq "1" ] ; then
rm "$1"
fi
}
overwrite_line() {
echo -en "\e[1A";
tput el;
echo -e "$1"
}
generate_romanized_name() { echo "$1" | iconv -f utf8 -t eucjp | kakasi -i euc -w | kakasi -i euc -Ha -Ka -Ja -Ea -ka | iconv -f utf8 -t ascii//translit; }
list_bad_files() {
all_files |
find_non_ascii_chars |
remove_incomplete_downloads
}
all_files() { find . -maxdepth 1 -type f; }
find_non_ascii_chars() { perl -ne 'print if ! /^[[:ascii:]]+$/'; }
remove_incomplete_downloads() {
grep -v ".!qB$" |
grep -v ".part$"
}
main
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment