Skip to content

Instantly share code, notes, and snippets.

@matthieubosquet
Created December 28, 2011 08:02
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save matthieubosquet/1527044 to your computer and use it in GitHub Desktop.
Save matthieubosquet/1527044 to your computer and use it in GitHub Desktop.
Python search command line script
#!/usr/bin/python
#
# Python search script 0.1
# 14th of june 2011 (started v0.0 on 1st of june 2011)
#
# Copyright 2011 Matthieu Bosquet
# Licensed under the GPL, Version 3.0
# http://www.gnu.org/licenses/gpl-3.0.txt
import argparse
from datetime import datetime
import fileinput
import os
import re
import shutil
import sys
def list_files(args):
files_abspaths = []
for root, dirs, files in os.walk(args.directory):
if not args.spooky:
dirs[:] = [ d for d in dirs if not re.match(r'\.\w', d) ]
files = [ f for f in files if not f.endswith(('.swp', '~', '.swf')) ]
if not args.archives:
files = [ f for f in files if not f.endswith(('.tar', '.gz', '.zip', '.7z', '.ace', '.jar', '.deb')) ]
if not args.images:
files = [ f for f in files if not f.endswith(('.jpeg', '.jpg', '.gif', '.png', '.ico', '.bmp', '.tiff')) ]
if args.extension:
files = [ f for f in files if f.endswith(args.extension) ]
for name in files:
files_abspaths.append(os.path.abspath(os.path.join(root, name)))
return files_abspaths
def pattern_search_files(files_abspaths, args, log_file):
start_time = datetime.now()
total_matches = 0
log_string = '\n\nPattern search ' + args.pattern + ' :'
print log_string,
if log_file: log_file.write(log_string)
if args.replace: total_replaced = 0
for file_abspath in files_abspaths:
f = open(file_abspath, 'rUb')
byte_counter = 0
f_details = {}
lines_matching_pattern = []
for i, line in enumerate(f.readlines()):
byte_counter += len(line)
f_details[i] = byte_counter
if re.findall(args.pattern, line): lines_matching_pattern.append(i)
matching_lines_text = []
lines_replaced = {}
for line in lines_matching_pattern:
if line == 0:
f.seek(0)
match = [('#', 'BEGINING OF FILE\n')]
elif line == 1:
f.seek(0)
match = [(line, f.readline())]
else:
f.seek(f_details[line - 2])
match = [(line, f.readline())]
if args.replace:
original_line = f.readline()
lines_replaced[line +1] = original_line
match.append((line + 1, re.sub(args.pattern, args.replace, original_line)))
else:
match.append((line + 1, f.readline()))
if line + 1 in f_details:
match.append((line + 2, f.readline()))
else:
match.append(('#', 'END OF FILE\n'))
matching_lines_text.append(match)
total_matches = total_matches + len(matching_lines_text)
log_string = '\n' + file_abspath + ' : ' + str(len(matching_lines_text)) + ' found\n'
print log_string,
if log_file: log_file.write(log_string)
log_string = ''
if args.replace: lines_replacement_strings = {}
for i, match in enumerate(matching_lines_text):
log_string = ''
log_string_replaced = ''
line_not_matched = True
for line in match:
log_string = log_string + str(line[0]) + ' : ' + line[1]
if args.replace and line_not_matched and line[0] in lines_replaced:
log_string_replaced = ' line ' + str(line[0]) + ' was : ' + lines_replaced[line[0]]
del lines_replaced[line[0]]
line_not_matched = False
lines_replacement_strings[line[0] - 1] = line[1]
matching_line_number = line[0] - 1
if args.replace: log_string = 'Match ' + str(i) + ' :' + log_string_replaced + log_string
else: log_string = 'Match ' + str(i) + ' :\n' + log_string
print log_string,
if log_file: log_file.write(log_string)
if args.replace:
if not yes_no_exit('Replace ?', args.force):
del lines_replacement_strings[matching_line_number]
if args.replace and len(lines_replacement_strings) > 0: total_replaced += file_overwrite(file_abspath, lines_replacement_strings, f, args)
f.close()
end_time = datetime.now()
log_replaced = ''
if args.replace: log_replaced = ' ' + str(total_replaced) + ' replaced'
log_string = '\nPattern ' + args.pattern + ', ' + str(total_matches) + ' found' + log_replaced + ' in ' + str(end_time - start_time) + '\n'
print log_string,
if log_file: log_file.write(log_string)
def file_overwrite(file_abspath, lines_replacement_strings, f, args):
total_replaced = 0
if yes_no_exit('Overwrite ' + file_abspath + ' ?', args.force):
total_replaced = len(lines_replacement_strings)
tmp_file = open(file_abspath+'.tmpreplace', 'w')
f.seek(0)
for i, line in enumerate(f.readlines()):
if i in lines_replacement_strings: tmp_file.write(lines_replacement_strings[i])
else: tmp_file.write(line)
print 'banzai !'
shutil.move(file_abspath+'.tmpreplace', file_abspath)
tmp_file.close()
return total_replaced
def yes_no_exit(question, force):
if force: return True
yes = set(['yes','y', 'ye', ''])
no = set(['no','n'])
choice = raw_input(question).lower()
if choice in yes:
return True
elif choice in no:
return False
elif choice == 'exit':
sys.exit()
else:
print 'Please respond yes, no or exit if you want to terminate the program'
yes_no_exit(question, False)
def main():
parser = argparse.ArgumentParser(description='Search for patterns in files', epilog="e.g. ./search.py -e '.txt' -p 'are not.*should'")
parser.add_argument('-a', '--archives', help='Include archives in your search', action='store_true')
parser.add_argument('-d', '--directory', help='Specify directory to lookup', default='.')
parser.add_argument('-e', '--extension', help='Specify file ending to look for')
parser.add_argument('-f', '--force', help='Force script to execute and ask no questions', action='store_true')
parser.add_argument('-i', '--images', help='Include images in your search', action='store_true')
parser.add_argument('-l', '--log', help='Specify a file name to write log of your operation')
parser.add_argument('-p', '--pattern', help='Specify a pattern to look for')
parser.add_argument('-r', '--replace', help='Specify a replacement string for the pattern')
parser.add_argument('-s', '--spooky', help='Include directories starting with a . in lookup (hidden directories)', action='store_true')
args = parser.parse_args()
start_time = datetime.now()
log_string = '\n\nScript ' + os.path.abspath( __file__ ) + '\nStarted : ' + str(start_time)
print log_string,
if args.log:
log_file = open(args.log, 'a')
log_file.write(log_string)
else:
log_file = False
files_abspaths = list_files(args)
log_string = ''
for line in files_abspaths:
log_string = log_string + '\n' + line
end_time = datetime.now()
log_string = '\n\n\n' + str(len(files_abspaths)) + ' files found in ' + str(end_time - start_time) + ' :' + log_string + '\n'
print log_string,
if args.log: log_file.write(log_string)
if args.pattern:
pattern_search_files(files_abspaths, args, log_file)
end_time = datetime.now()
log_string = '\n\nScript run time : ' + str(end_time - start_time) + '\nFinished : ' + str(end_time) + '\n\n\n'
print log_string,
if args.log:
log_file.write(log_string)
log_file.close()
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment