Skip to content

Instantly share code, notes, and snippets.

@yonglai
Last active June 3, 2017 14:02
Show Gist options
  • Save yonglai/aff9439609a3f1c308e36ec0962f4223 to your computer and use it in GitHub Desktop.
Save yonglai/aff9439609a3f1c308e36ec0962f4223 to your computer and use it in GitHub Desktop.
Example python regex usage on e-book management
import os
from sets import Set
import shutil
import re
duplicates = []
folder = '/vagrant/books/unorganized'
totalNumOfFiles = 0
numOfDupFiles = 0
numOfNonDupFiles = 0
for folderName, subfolders, filenames in os.walk(folder):
for f in filenames:
totalNumOfFiles += 1
filename, file_extension = os.path.splitext(f)
if file_extension != '':
fileTypeFolder = '/vagrant/books/organized/' + file_extension[1:]
if not os.path.exists(fileTypeFolder):
os.makedirs(fileTypeFolder)
oldFilePath = folderName + '/' + f
newFilePath = fileTypeFolder + '/' + f
if not os.path.exists(newFilePath):
print('move ' + oldFilePath + ' to ' + newFilePath)
numOfNonDupFiles += 1
shutil.move(oldFilePath, newFilePath)
else:
duplicates.append(oldFilePath)
numOfDupFiles += 1
if duplicates:
duplicatesFile = open('/vagrant/books/duplicates.txt', 'w')
for filename in duplicates:
duplicatesFile.write(filename + '\n')
duplicatesFile.close()
print("total num of files={0}, num of dup files={1}, num of non dup files={2}".format(totalNumOfFiles, numOfDupFiles, numOfNonDupFiles))
if (totalNumOfFiles == (numOfDupFiles + numOfNonDupFiles)):
print 'correct counting'
else:
print 'incorrect counting'
import os
import shutil
import re
import csv
from checkBookNames import *
duplicates = []
'''
folder = '/vagrant/books/organized'
for folderName, subfolders, filenames in os.walk(folder):
for filename in filenames:
fileNameRegex = re.compile(r'^([^\w]+[\w.]+[^\w]+)')
found = fileNameRegex.search(filename)
if found != None:
#result.append((filename, )
oldFilePath = folderName + '/' + filename
newFilePath = folderName + '/' + fileNameRegex.sub('', filename)
if not os.path.exists(newFilePath):
print('Rename: ' + oldFilePath + '--->' + newFilePath)
shutil.move(oldFilePath, newFilePath)
else:
print('Duplicates found: ' + oldFilePath)
duplicates.append(oldFilePath)
#os.unlink(oldFilePath)
'''
for (oldFilePath, newFilePath) in getUpdatedNames():
if not os.path.exists(newFilePath):
#print('Rename: ' + oldFilePath + '--->' + newFilePath)
shutil.move(oldFilePath, newFilePath)
else:
#print('Duplicates found: ' + oldFilePath)
duplicates.append(oldFilePath)
os.unlink(oldFilePath)
if duplicates:
duplicatesFile = open('/vagrant/books/duplicates_organized.txt', 'w')
for filename in duplicates:
duplicatesFile.write(filename + '\n')
duplicatesFile.close()
import os
import shutil
import re
import csv
def getUpdatedNames():
result = []
folder = '/vagrant/books/organized'
for folderName, subfolders, filenames in os.walk(folder):
for filename in filenames:
fileNameRegex = re.compile(r'\[.*?\][^a-zA-Z(\[]*')
found = fileNameRegex.search(filename)
if found != None:
new_name = fileNameRegex.sub('', filename)
special_files = ['Artifacts', 'Witchblade', 'X-Men', 'le.Dragon', 'Fables', 'TW', 'X-Force']
if any (x in filename for x in special_files):
new_name = re.sub('^\[.*?\]', '', filename)
#print(name_name)
#pattern2 = re.compile(r'\(.*?\)')
#new_name = pattern2.sub('', new_name)
result.append((folderName + '/' + filename, folderName + '/' + (new_name if '.' in new_name else filename)))
return result
if __name__ == "__main__":
with open('/vagrant/books/name_changes.csv', 'w') as csvfile:
fieldnames = ['old_name', 'new_name']
write = csv.DictWriter(csvfile, fieldnames = fieldnames)
write.writeheader()
result = getUpdatedNames()
for oldname, newname in result:
write.writerow({'old_name': oldname, 'new_name': newname})
import os
import shutil
import re
duplicates = []
folder = '/vagrant/books/unorganized'
for the_file in os.listdir(folder):
file_path = os.path.join(folder, the_file)
print(file_path)
try:
if os.path.isfile(file_path):
print('remove file ' + file_path)
os.unlink(file_path)
elif os.path.isdir(file_path):
print('remove folder ' + file_path)
shutil.rmtree(file_path)
except Exception as e:
print(e)
import os
from sets import Set
import shutil
import re
duplicates = []
folder = '/vagrant/books/organized'
for folderName, subfolders, filenames in os.walk(folder):
for filename in filenames:
if filename.startswith('._'):
filepath = folderName + '/' + filename
print('remove: ' + filepath)
os.unlink(filepath)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment