Skip to content

Instantly share code, notes, and snippets.

Last active June 2, 2019 16:39
Show Gist options
  • Save beatorizu/a11d1a4b3f8cdb91507b8fe49baf39fe to your computer and use it in GitHub Desktop.
Save beatorizu/a11d1a4b3f8cdb91507b8fe49baf39fe to your computer and use it in GitHub Desktop.
Python scripts with regex powers to find files XD
from datetime import datetime
from json import dump
from os import walk
from os.path import join
import logging
import re
def search_dir(root, target='.git', pattern=re.compile(r'^\.git|[\w\d\s!@#$%^&*()_+\-=\[\]{};\':"\\|,<>\/?]+$'),
exclude=set(['data', 'log', 'logs', 'DATA', 'LOG', 'LOGS'])):
exclude = set(['data', 'log', 'logs', 'DATA', 'LOG', 'LOGS']) | set(exclude)
results = []
for root, dirs, files in walk(root, topdown=True):
dirs[:] = list(filter(lambda d: d not in exclude and pattern.match(d), dirs))
if target in dirs:
results.append(join(root, target))
return results
def search_pattern(pattern, filename):
comments_regex = re.compile(r'^(#|\*)')
with open(filename) as fileb:
lines ='\n')
lines[:] = list(filter(lambda line: and not and not 'print' in line, lines))
if lines:
return {filename: lines}
return None
except UnicodeDecodeError:
return None
except IOError:
return None
if __name__ == '__main__':
now =
logger = logging.getLogger(__name__)
handler = logging.FileHandler('db_conn.log')
formatter = logging.Formatter('%(levelname)s - %(asctime)s - %(name)s - %(message)s')
logger.addHandler(handler)'[I] {now}')
wget_regex = re.compile(r'(wget|curl)')
ip_regex = re.compile(r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}')
storage_regex = re.compile(r'storage-m\d')
db_regex = re.compile(r'(insert|INSERT|update|UPDATE|select|SELECT|delete|DELETE|psycopg|dbname|sqlalchemy)')
file3d_regex = re.compile(r'(3d|lvl)[\w\d\s!@#$%^&*()_+\-=\[\]{};\':"\\|,<>\/?]*\.nc')
exclude = set(['data', 'log', 'logs', 'testes', 'DATA', 'LOG', 'LOGS', 'TESTES', 'storage-m1', 'storage-m2', 'sistema', '.git'])
pattern = re.compile(r'^\.git|[\w\d\s!@#$%^&*()_+\-=\[\]{};\':"\\|,<>\/?]+$')
files_to_read = re.compile(r'^[\w\d\s!@#$%^&*()_+\-=\[\]{};\':"\\|,<>\/?]+\.(gs|pl|py|(k|c)*sh)$')
files_to_read = re.compile(r'^[\w\d\s!@#$%^&*()_+\-=\[\]{};\':"\\|,<>\/?]+\.py$')
bkp_regex = re.compile(r'(backup|anaconda2_compartilhados|lixeira|anaconda2|bkp)')
comments_regex = re.compile(r'(#|\*)')
# results_storage_m = []
# results_3d = []
results = []
for root, dirs, files in walk('/work', topdown=True):
dirs[:] = list(filter(lambda d: d not in exclude and pattern.match(d), dirs))
files[:] = list(filter(lambda f: files_to_read.match(f), files))
files[:] = list(filter(lambda f: search_pattern(db_regex, join(root, f)), files))
# files_storage_m = list(filter(lambda f: search_pattern(storage_regex, join(root, f)), files))
# files_3d = [search_pattern(file3d_regex, join(root, f)) for f in files]
# files_3d = list(filter(lambda f: search_pattern(file3d_regex, join(root, f)), files))
# results_storage_m.extend(list(map(lambda f: join(root, f), files_storage_m)))
# results_3d.extend(files_3d)
# results_3d.extend(list(map(lambda f: join(root,f), files_3d)))
results.extend(list(map(lambda f: join(root, f), files)))
with open('db_conn.json', 'w') as jsonfile:
dump(results, jsonfile)
duration = - now'[I] {duration}')'[I] {}')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment