Last active
July 25, 2018 14:45
-
-
Save lwiecek/d4ae5dac4a08ee8cd2dc5932fa9e8eb9 to your computer and use it in GitHub Desktop.
Get strings that look like a copy string in a given directory (recursively)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import sys | |
import re | |
def list_files(directory, extension): | |
result = [] | |
paths = [os.path.join(directory, name) for name in os.listdir(directory)] | |
dirs = filter(os.path.isdir, paths) | |
matches_ext = lambda path: os.path.splitext(path)[1] == extension | |
files_matching_ext = filter(matches_ext, paths) | |
return list(files_matching_ext) + [f for d in dirs for f in list_files(d, extension)] | |
REGEXP_NO_QUOTES = r'.*[a-zA-Z]+\s+[a-zA-Z]+.*'; | |
def find_sentences(file_path): | |
# Find copy strings with at least two words separated by space | |
lines = [] | |
with open(file_path) as f: | |
for number, line in enumerate(f): | |
if ( | |
re.search("'{}'".format(REGEXP_NO_QUOTES), line) or | |
re.search('"{}"'.format(REGEXP_NO_QUOTES), line) | |
): | |
lines.append('{}: {}'.format(number, line.strip())) | |
return lines | |
def main(): | |
path = (sys.argv[1:] or ['.'])[0] | |
files = list_files(path, extension='.js') | |
for file_path in files: | |
sentences = find_sentences(file_path) | |
if sentences: | |
print() | |
print(file_path) | |
print('\n'.join(sentences)) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment