Created
December 28, 2011 08:02
-
-
Save matthieubosquet/1527044 to your computer and use it in GitHub Desktop.
Python search command line script
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
# | |
# Python search script 0.1 | |
# 14th of june 2011 (started v0.0 on 1st of june 2011) | |
# | |
# Copyright 2011 Matthieu Bosquet | |
# Licensed under the GPL, Version 3.0 | |
# http://www.gnu.org/licenses/gpl-3.0.txt | |
import argparse | |
from datetime import datetime | |
import fileinput | |
import os | |
import re | |
import shutil | |
import sys | |
def list_files(args): | |
files_abspaths = [] | |
for root, dirs, files in os.walk(args.directory): | |
if not args.spooky: | |
dirs[:] = [ d for d in dirs if not re.match(r'\.\w', d) ] | |
files = [ f for f in files if not f.endswith(('.swp', '~', '.swf')) ] | |
if not args.archives: | |
files = [ f for f in files if not f.endswith(('.tar', '.gz', '.zip', '.7z', '.ace', '.jar', '.deb')) ] | |
if not args.images: | |
files = [ f for f in files if not f.endswith(('.jpeg', '.jpg', '.gif', '.png', '.ico', '.bmp', '.tiff')) ] | |
if args.extension: | |
files = [ f for f in files if f.endswith(args.extension) ] | |
for name in files: | |
files_abspaths.append(os.path.abspath(os.path.join(root, name))) | |
return files_abspaths | |
def pattern_search_files(files_abspaths, args, log_file): | |
start_time = datetime.now() | |
total_matches = 0 | |
log_string = '\n\nPattern search ' + args.pattern + ' :' | |
print log_string, | |
if log_file: log_file.write(log_string) | |
if args.replace: total_replaced = 0 | |
for file_abspath in files_abspaths: | |
f = open(file_abspath, 'rUb') | |
byte_counter = 0 | |
f_details = {} | |
lines_matching_pattern = [] | |
for i, line in enumerate(f.readlines()): | |
byte_counter += len(line) | |
f_details[i] = byte_counter | |
if re.findall(args.pattern, line): lines_matching_pattern.append(i) | |
matching_lines_text = [] | |
lines_replaced = {} | |
for line in lines_matching_pattern: | |
if line == 0: | |
f.seek(0) | |
match = [('#', 'BEGINING OF FILE\n')] | |
elif line == 1: | |
f.seek(0) | |
match = [(line, f.readline())] | |
else: | |
f.seek(f_details[line - 2]) | |
match = [(line, f.readline())] | |
if args.replace: | |
original_line = f.readline() | |
lines_replaced[line +1] = original_line | |
match.append((line + 1, re.sub(args.pattern, args.replace, original_line))) | |
else: | |
match.append((line + 1, f.readline())) | |
if line + 1 in f_details: | |
match.append((line + 2, f.readline())) | |
else: | |
match.append(('#', 'END OF FILE\n')) | |
matching_lines_text.append(match) | |
total_matches = total_matches + len(matching_lines_text) | |
log_string = '\n' + file_abspath + ' : ' + str(len(matching_lines_text)) + ' found\n' | |
print log_string, | |
if log_file: log_file.write(log_string) | |
log_string = '' | |
if args.replace: lines_replacement_strings = {} | |
for i, match in enumerate(matching_lines_text): | |
log_string = '' | |
log_string_replaced = '' | |
line_not_matched = True | |
for line in match: | |
log_string = log_string + str(line[0]) + ' : ' + line[1] | |
if args.replace and line_not_matched and line[0] in lines_replaced: | |
log_string_replaced = ' line ' + str(line[0]) + ' was : ' + lines_replaced[line[0]] | |
del lines_replaced[line[0]] | |
line_not_matched = False | |
lines_replacement_strings[line[0] - 1] = line[1] | |
matching_line_number = line[0] - 1 | |
if args.replace: log_string = 'Match ' + str(i) + ' :' + log_string_replaced + log_string | |
else: log_string = 'Match ' + str(i) + ' :\n' + log_string | |
print log_string, | |
if log_file: log_file.write(log_string) | |
if args.replace: | |
if not yes_no_exit('Replace ?', args.force): | |
del lines_replacement_strings[matching_line_number] | |
if args.replace and len(lines_replacement_strings) > 0: total_replaced += file_overwrite(file_abspath, lines_replacement_strings, f, args) | |
f.close() | |
end_time = datetime.now() | |
log_replaced = '' | |
if args.replace: log_replaced = ' ' + str(total_replaced) + ' replaced' | |
log_string = '\nPattern ' + args.pattern + ', ' + str(total_matches) + ' found' + log_replaced + ' in ' + str(end_time - start_time) + '\n' | |
print log_string, | |
if log_file: log_file.write(log_string) | |
def file_overwrite(file_abspath, lines_replacement_strings, f, args): | |
total_replaced = 0 | |
if yes_no_exit('Overwrite ' + file_abspath + ' ?', args.force): | |
total_replaced = len(lines_replacement_strings) | |
tmp_file = open(file_abspath+'.tmpreplace', 'w') | |
f.seek(0) | |
for i, line in enumerate(f.readlines()): | |
if i in lines_replacement_strings: tmp_file.write(lines_replacement_strings[i]) | |
else: tmp_file.write(line) | |
print 'banzai !' | |
shutil.move(file_abspath+'.tmpreplace', file_abspath) | |
tmp_file.close() | |
return total_replaced | |
def yes_no_exit(question, force): | |
if force: return True | |
yes = set(['yes','y', 'ye', '']) | |
no = set(['no','n']) | |
choice = raw_input(question).lower() | |
if choice in yes: | |
return True | |
elif choice in no: | |
return False | |
elif choice == 'exit': | |
sys.exit() | |
else: | |
print 'Please respond yes, no or exit if you want to terminate the program' | |
yes_no_exit(question, False) | |
def main(): | |
parser = argparse.ArgumentParser(description='Search for patterns in files', epilog="e.g. ./search.py -e '.txt' -p 'are not.*should'") | |
parser.add_argument('-a', '--archives', help='Include archives in your search', action='store_true') | |
parser.add_argument('-d', '--directory', help='Specify directory to lookup', default='.') | |
parser.add_argument('-e', '--extension', help='Specify file ending to look for') | |
parser.add_argument('-f', '--force', help='Force script to execute and ask no questions', action='store_true') | |
parser.add_argument('-i', '--images', help='Include images in your search', action='store_true') | |
parser.add_argument('-l', '--log', help='Specify a file name to write log of your operation') | |
parser.add_argument('-p', '--pattern', help='Specify a pattern to look for') | |
parser.add_argument('-r', '--replace', help='Specify a replacement string for the pattern') | |
parser.add_argument('-s', '--spooky', help='Include directories starting with a . in lookup (hidden directories)', action='store_true') | |
args = parser.parse_args() | |
start_time = datetime.now() | |
log_string = '\n\nScript ' + os.path.abspath( __file__ ) + '\nStarted : ' + str(start_time) | |
print log_string, | |
if args.log: | |
log_file = open(args.log, 'a') | |
log_file.write(log_string) | |
else: | |
log_file = False | |
files_abspaths = list_files(args) | |
log_string = '' | |
for line in files_abspaths: | |
log_string = log_string + '\n' + line | |
end_time = datetime.now() | |
log_string = '\n\n\n' + str(len(files_abspaths)) + ' files found in ' + str(end_time - start_time) + ' :' + log_string + '\n' | |
print log_string, | |
if args.log: log_file.write(log_string) | |
if args.pattern: | |
pattern_search_files(files_abspaths, args, log_file) | |
end_time = datetime.now() | |
log_string = '\n\nScript run time : ' + str(end_time - start_time) + '\nFinished : ' + str(end_time) + '\n\n\n' | |
print log_string, | |
if args.log: | |
log_file.write(log_string) | |
log_file.close() | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment