Created
April 26, 2015 05:51
-
-
Save gmcclins/9d848118e8ea5c1befd9 to your computer and use it in GitHub Desktop.
cleans up text files by removing non-ascii
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
# This script cleans up text files by removing non-ascii | |
# Geoffrey McClinsey | |
set -e | |
echo what file has the non-ascii text? #include path to file if not in same location as script | |
read dirty | |
echo -e "\v" | |
echo what would you like to name the clean file #if no path is given, will output to script location | |
read clean | |
echo -e "\v" | |
tr -cd '\11\12\15\40-\176' < "$dirty" > "$clean" | |
read -r -p "Do you want to keep the original? [Y/n] " response | |
response=${response,,} | |
if [[ $response =~ ^(NO|No|no|N|n)$ ]]; | |
then | |
rm "$dirty" | |
else | |
exit 0 | |
fi | |
# Need to check to make sure filename does not already exist to prevent overwrite | |
# If you keep original filename as clean filename, the file will be empty (complete data loss)??? | |
# Limit response to a variant of yes/no, error out on any other answer |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment