Created
August 27, 2018 03:11
-
-
Save srdg/796a06e8518cece12d64ca1b2a2d4505 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import shutil | |
import cv2 | |
from PIL import Image | |
def rename_files(): | |
# renames all files in folders recursively | |
try: | |
folders = sorted(os.listdir(os.getcwd())) | |
print(folders) | |
folder_idx=1 | |
for folder in folders: | |
print("in folders") | |
file_idx=1 | |
if folder.endswith(".py"): | |
continue | |
for file in sorted(os.listdir(os.getcwd()+'/'+folder)): | |
print("in files",file) | |
ext = file[-4:] | |
print(ext,end= ' ') | |
os.rename(os.getcwd()+'/'+folder+'/'+file,os.getcwd()+'/'+folder+'/'+folder+'_'+str(file_idx)+ext) | |
file_idx+=1 | |
folder_idx+=1 | |
except Exception as e: | |
raise e | |
def accumulate(lang=None): | |
# collect all files to a single language folder | |
if lang == None: | |
raise ValueError | |
return | |
try: | |
for folder in sorted(os.listdir(os.getcwd())): | |
if folder.endswith(".py"): | |
continue | |
for file in sorted(os.listdir(os.getcwd()+'/'+folder)): | |
new_file = file.replace(' ','_') | |
shutil.move(os.getcwd()+'/'+folder+'/'+file,os.getcwd()+'/'+lang+'/'+new_file) | |
except Exception as e: | |
raise e | |
def preprocess(home=None): | |
# remove garbage files and preprocess images | |
if home == None: | |
raise ValueError | |
return | |
try: | |
for file in sorted(os.listdir(os.getcwd()+'/'+home)): | |
print(file[-3:]) | |
if file[-3:]=='.db': | |
continue | |
img = cv2.imread(os.getcwd()+'/'+home+'/'+file,0) | |
ret,img = cv2.threshold(img,127,255,cv2.THRESH_BINARY) | |
cv2.imwrite(os.getcwd()+'/'+home+'/'+'Binarized_'+file,img) | |
except Exception as e: | |
raise e | |
def main(): | |
# driver module | |
rename_files() | |
inp = input("Enter path ").strip() | |
accumulate(inp) | |
preprocess(inp) | |
if __name__ == '__main__': | |
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from googletrans import Translator | |
import time | |
import os | |
import shutil | |
def file_names(path=None): | |
# return list of files in path | |
if path==None: | |
raise ValueError | |
return sorted(os.listdir(path)) | |
def translate_files(path=None): | |
# translates and writes file | |
parent = [] | |
# list of translated strings to be written | |
translator = Translator() | |
# create translator object | |
files = file_names(path) | |
for file in files[len(os.listdir(path[:-7]+'/translated-results')):]: | |
parent = '' | |
flag_err = 0 # flag for files | |
f_in = open(path+'/'+file,"r") | |
f_out = open('Translated_'+file,"w+") | |
write='' | |
for line in f_in: | |
parent+=line[:-1]+' ' | |
# needed so that Google API does not block the current IP address | |
try: | |
write = translator.translate(parent).text | |
except : | |
print("JSON Decode error occurred!") | |
flag_err = 1 | |
f_out.writelines(write) | |
time.sleep(1) | |
f_in.close() | |
f_out.close() | |
print('Translating '+file+' ... done.') | |
shutil.move(os.getcwd()+'/Translated_'+file, path[:-7]+'/translated-results/'+'Translated_'+file) | |
if flag_err==1: | |
os.rename(path[:-7]+'/translated-results/'+'Translated_'+file,path[:-7]+'/translated-results/'+'ERR-Translated_'+file) | |
def main(): | |
# driver module | |
# checklist : path to dataset | |
path = '/'+input("Enter path ")+'/' | |
try: | |
os.makedirs(os.getcwd()+path+'translated-results') | |
except: | |
pass # dir exists | |
translate_files(os.getcwd()+path+'results') | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment