Skip to content

Instantly share code, notes, and snippets.

@beatorizu
Last active June 2, 2019 16:39
Show Gist options
  • Save beatorizu/a11d1a4b3f8cdb91507b8fe49baf39fe to your computer and use it in GitHub Desktop.
Save beatorizu/a11d1a4b3f8cdb91507b8fe49baf39fe to your computer and use it in GitHub Desktop.
Python scripts with regex powers to find files XD
#!/opt/anaconda2/envs/py360/bin/python
from datetime import datetime
from json import dump
from os import walk
from os.path import join
import logging
import re
def search_dir(root, target='.git', pattern=re.compile(r'^\.git|[\w\d\s!@#$%^&*()_+\-=\[\]{};\':"\\|,<>\/?]+$'),
exclude=set(['data', 'log', 'logs', 'DATA', 'LOG', 'LOGS'])):
exclude = set(['data', 'log', 'logs', 'DATA', 'LOG', 'LOGS']) | set(exclude)
results = []
for root, dirs, files in walk(root, topdown=True):
dirs[:] = list(filter(lambda d: d not in exclude and pattern.match(d), dirs))
if target in dirs:
results.append(join(root, target))
return results
def search_pattern(pattern, filename):
comments_regex = re.compile(r'^(#|\*)')
try:
with open(filename) as fileb:
try:
lines = fileb.read().split('\n')
lines[:] = list(filter(lambda line: pattern.search(line) and not comments_regex.search(line) and not 'print' in line, lines))
if lines:
return {filename: lines}
return None
except UnicodeDecodeError:
return None
except IOError:
return None
if __name__ == '__main__':
now = datetime.now()
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
handler = logging.FileHandler('db_conn.log')
handler.setLevel(logging.INFO)
formatter = logging.Formatter('%(levelname)s - %(asctime)s - %(name)s - %(message)s')
handler.setFormatter(formatter)
logger.addHandler(handler)
logger.info(f'[I] {now}')
wget_regex = re.compile(r'(wget|curl)')
ip_regex = re.compile(r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}')
storage_regex = re.compile(r'storage-m\d')
db_regex = re.compile(r'(insert|INSERT|update|UPDATE|select|SELECT|delete|DELETE|psycopg|dbname|sqlalchemy)')
file3d_regex = re.compile(r'(3d|lvl)[\w\d\s!@#$%^&*()_+\-=\[\]{};\':"\\|,<>\/?]*\.nc')
exclude = set(['data', 'log', 'logs', 'testes', 'DATA', 'LOG', 'LOGS', 'TESTES', 'storage-m1', 'storage-m2', 'sistema', '.git'])
pattern = re.compile(r'^\.git|[\w\d\s!@#$%^&*()_+\-=\[\]{};\':"\\|,<>\/?]+$')
files_to_read = re.compile(r'^[\w\d\s!@#$%^&*()_+\-=\[\]{};\':"\\|,<>\/?]+\.(gs|pl|py|(k|c)*sh)$')
files_to_read = re.compile(r'^[\w\d\s!@#$%^&*()_+\-=\[\]{};\':"\\|,<>\/?]+\.py$')
bkp_regex = re.compile(r'(backup|anaconda2_compartilhados|lixeira|anaconda2|bkp)')
comments_regex = re.compile(r'(#|\*)')
# results_storage_m = []
# results_3d = []
results = []
for root, dirs, files in walk('/work', topdown=True):
dirs[:] = list(filter(lambda d: d not in exclude and pattern.match(d), dirs))
files[:] = list(filter(lambda f: files_to_read.match(f), files))
files[:] = list(filter(lambda f: search_pattern(db_regex, join(root, f)), files))
# files_storage_m = list(filter(lambda f: search_pattern(storage_regex, join(root, f)), files))
# files_3d = [search_pattern(file3d_regex, join(root, f)) for f in files]
# files_3d = list(filter(lambda f: search_pattern(file3d_regex, join(root, f)), files))
# results_storage_m.extend(list(map(lambda f: join(root, f), files_storage_m)))
# results_3d.extend(files_3d)
# results_3d.extend(list(map(lambda f: join(root,f), files_3d)))
results.extend(list(map(lambda f: join(root, f), files)))
with open('db_conn.json', 'w') as jsonfile:
dump(results, jsonfile)
duration = datetime.now() - now
logger.info(f'[I] {duration}')
logger.info(f'[I] {datetime.now()}')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment