Skip to content

Instantly share code, notes, and snippets.

@srdg
Created August 27, 2018 03:11
Show Gist options
  • Save srdg/796a06e8518cece12d64ca1b2a2d4505 to your computer and use it in GitHub Desktop.
Save srdg/796a06e8518cece12d64ca1b2a2d4505 to your computer and use it in GitHub Desktop.
import os
import shutil
import cv2
from PIL import Image
def rename_files():
# renames all files in folders recursively
try:
folders = sorted(os.listdir(os.getcwd()))
print(folders)
folder_idx=1
for folder in folders:
print("in folders")
file_idx=1
if folder.endswith(".py"):
continue
for file in sorted(os.listdir(os.getcwd()+'/'+folder)):
print("in files",file)
ext = file[-4:]
print(ext,end= ' ')
os.rename(os.getcwd()+'/'+folder+'/'+file,os.getcwd()+'/'+folder+'/'+folder+'_'+str(file_idx)+ext)
file_idx+=1
folder_idx+=1
except Exception as e:
raise e
def accumulate(lang=None):
# collect all files to a single language folder
if lang == None:
raise ValueError
return
try:
for folder in sorted(os.listdir(os.getcwd())):
if folder.endswith(".py"):
continue
for file in sorted(os.listdir(os.getcwd()+'/'+folder)):
new_file = file.replace(' ','_')
shutil.move(os.getcwd()+'/'+folder+'/'+file,os.getcwd()+'/'+lang+'/'+new_file)
except Exception as e:
raise e
def preprocess(home=None):
# remove garbage files and preprocess images
if home == None:
raise ValueError
return
try:
for file in sorted(os.listdir(os.getcwd()+'/'+home)):
print(file[-3:])
if file[-3:]=='.db':
continue
img = cv2.imread(os.getcwd()+'/'+home+'/'+file,0)
ret,img = cv2.threshold(img,127,255,cv2.THRESH_BINARY)
cv2.imwrite(os.getcwd()+'/'+home+'/'+'Binarized_'+file,img)
except Exception as e:
raise e
def main():
# driver module
rename_files()
inp = input("Enter path ").strip()
accumulate(inp)
preprocess(inp)
if __name__ == '__main__':
main()
from googletrans import Translator
import time
import os
import shutil
def file_names(path=None):
# return list of files in path
if path==None:
raise ValueError
return sorted(os.listdir(path))
def translate_files(path=None):
# translates and writes file
parent = []
# list of translated strings to be written
translator = Translator()
# create translator object
files = file_names(path)
for file in files[len(os.listdir(path[:-7]+'/translated-results')):]:
parent = ''
flag_err = 0 # flag for files
f_in = open(path+'/'+file,"r")
f_out = open('Translated_'+file,"w+")
write=''
for line in f_in:
parent+=line[:-1]+' '
# needed so that Google API does not block the current IP address
try:
write = translator.translate(parent).text
except :
print("JSON Decode error occurred!")
flag_err = 1
f_out.writelines(write)
time.sleep(1)
f_in.close()
f_out.close()
print('Translating '+file+' ... done.')
shutil.move(os.getcwd()+'/Translated_'+file, path[:-7]+'/translated-results/'+'Translated_'+file)
if flag_err==1:
os.rename(path[:-7]+'/translated-results/'+'Translated_'+file,path[:-7]+'/translated-results/'+'ERR-Translated_'+file)
def main():
# driver module
# checklist : path to dataset
path = '/'+input("Enter path ")+'/'
try:
os.makedirs(os.getcwd()+path+'translated-results')
except:
pass # dir exists
translate_files(os.getcwd()+path+'results')
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment