beatorizu/find_git.py

## find_git.py
#!/opt/anaconda2/envs/py360/bin/python
from datetime import datetime
from json import dump
from os import walk
from os.path import join
import logging
import re


def search_dir(root, target='.git', pattern=re.compile(r'^\.git|[\w\d\s!@#$%^&*()_+\-=\[\]{};\':"\\|,<>\/?]+$'),
               exclude=set(['data', 'log', 'logs', 'DATA', 'LOG', 'LOGS'])):
    exclude = set(['data', 'log', 'logs', 'DATA', 'LOG', 'LOGS']) | set(exclude)
    results = []
    for root, dirs, files in walk(root, topdown=True):
        dirs[:] = list(filter(lambda d: d not in exclude and pattern.match(d), dirs))
        if target in dirs:
            results.append(join(root, target))
    return results


def search_pattern(pattern, filename):
    comments_regex = re.compile(r'^(#|\*)')
    try:
        with open(filename) as fileb:
            try:
                lines = fileb.read().split('\n')
                lines[:] = list(filter(lambda line: pattern.search(line) and not comments_regex.search(line) and not 'print' in line, lines))
                if lines:
                    return {filename: lines}
                return None
            except UnicodeDecodeError:
                return None
    except IOError:
        return None


if __name__ == '__main__':
    now = datetime.now()

    logger = logging.getLogger(__name__)
    logger.setLevel(logging.INFO)

    handler = logging.FileHandler('db_conn.log')
    handler.setLevel(logging.INFO)

    formatter = logging.Formatter('%(levelname)s - %(asctime)s - %(name)s - %(message)s')
    handler.setFormatter(formatter)

    logger.addHandler(handler)
    logger.info(f'[I] {now}')

    wget_regex = re.compile(r'(wget|curl)')
    ip_regex = re.compile(r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}')
    storage_regex = re.compile(r'storage-m\d')
    db_regex = re.compile(r'(insert|INSERT|update|UPDATE|select|SELECT|delete|DELETE|psycopg|dbname|sqlalchemy)')
    file3d_regex = re.compile(r'(3d|lvl)[\w\d\s!@#$%^&*()_+\-=\[\]{};\':"\\|,<>\/?]*\.nc')
    exclude = set(['data', 'log', 'logs', 'testes', 'DATA', 'LOG', 'LOGS', 'TESTES', 'storage-m1', 'storage-m2', 'sistema', '.git'])
    pattern = re.compile(r'^\.git|[\w\d\s!@#$%^&*()_+\-=\[\]{};\':"\\|,<>\/?]+$')
    files_to_read = re.compile(r'^[\w\d\s!@#$%^&*()_+\-=\[\]{};\':"\\|,<>\/?]+\.(gs|pl|py|(k|c)*sh)$')
    files_to_read = re.compile(r'^[\w\d\s!@#$%^&*()_+\-=\[\]{};\':"\\|,<>\/?]+\.py$')
    bkp_regex = re.compile(r'(backup|anaconda2_compartilhados|lixeira|anaconda2|bkp)')
    comments_regex = re.compile(r'(#|\*)')
    # results_storage_m = []
    # results_3d = []
    results = []

    for root, dirs, files in walk('/work', topdown=True):
        dirs[:] = list(filter(lambda d: d not in exclude and pattern.match(d), dirs))
        files[:] = list(filter(lambda f: files_to_read.match(f), files))
        files[:] = list(filter(lambda f: search_pattern(db_regex, join(root, f)), files))

        # files_storage_m = list(filter(lambda f: search_pattern(storage_regex, join(root, f)), files))
        # files_3d = [search_pattern(file3d_regex, join(root, f)) for f in files]
        # files_3d = list(filter(lambda f: search_pattern(file3d_regex, join(root, f)), files))
        # results_storage_m.extend(list(map(lambda f: join(root, f), files_storage_m)))
        # results_3d.extend(files_3d)
        # results_3d.extend(list(map(lambda f: join(root,f), files_3d)))
        results.extend(list(map(lambda f: join(root, f), files)))

    with open('db_conn.json', 'w') as jsonfile:
        dump(results, jsonfile)

    duration = datetime.now() - now
    logger.info(f'[I] {duration}')
    logger.info(f'[I] {datetime.now()}')
	#!/opt/anaconda2/envs/py360/bin/python
	from datetime import datetime
	from json import dump
	from os import walk
	from os.path import join
	import logging
	import re


	def search_dir(root, target='.git', pattern=re.compile(r'^\.git\|[\w\d\s!@#$%^&*()_+\-=\[\]{};\':"\\\|,<>\/?]+$'),
	exclude=set(['data', 'log', 'logs', 'DATA', 'LOG', 'LOGS'])):
	exclude = set(['data', 'log', 'logs', 'DATA', 'LOG', 'LOGS']) \| set(exclude)
	results = []
	for root, dirs, files in walk(root, topdown=True):
	dirs[:] = list(filter(lambda d: d not in exclude and pattern.match(d), dirs))
	if target in dirs:
	results.append(join(root, target))
	return results


	def search_pattern(pattern, filename):
	comments_regex = re.compile(r'^(#\|\*)')
	try:
	with open(filename) as fileb:
	try:
	lines = fileb.read().split('\n')
	lines[:] = list(filter(lambda line: pattern.search(line) and not comments_regex.search(line) and not 'print' in line, lines))
	if lines:
	return {filename: lines}
	return None
	except UnicodeDecodeError:
	return None
	except IOError:
	return None


	if __name__ == '__main__':
	now = datetime.now()

	logger = logging.getLogger(__name__)
	logger.setLevel(logging.INFO)

	handler = logging.FileHandler('db_conn.log')
	handler.setLevel(logging.INFO)

	formatter = logging.Formatter('%(levelname)s - %(asctime)s - %(name)s - %(message)s')
	handler.setFormatter(formatter)

	logger.addHandler(handler)
	logger.info(f'[I] {now}')

	wget_regex = re.compile(r'(wget\|curl)')
	ip_regex = re.compile(r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}')
	storage_regex = re.compile(r'storage-m\d')
	db_regex = re.compile(r'(insert\|INSERT\|update\|UPDATE\|select\|SELECT\|delete\|DELETE\|psycopg\|dbname\|sqlalchemy)')
	file3d_regex = re.compile(r'(3d\|lvl)[\w\d\s!@#$%^&()_+\-=\[\]{};\':"\\\|,<>\/?]\.nc')
	exclude = set(['data', 'log', 'logs', 'testes', 'DATA', 'LOG', 'LOGS', 'TESTES', 'storage-m1', 'storage-m2', 'sistema', '.git'])
	pattern = re.compile(r'^\.git\|[\w\d\s!@#$%^&*()_+\-=\[\]{};\':"\\\|,<>\/?]+$')
	files_to_read = re.compile(r'^[\w\d\s!@#$%^&()_+\-=\[\]{};\':"\\\|,<>\/?]+\.(gs\|pl\|py\|(k\|c)sh)$')
	files_to_read = re.compile(r'^[\w\d\s!@#$%^&*()_+\-=\[\]{};\':"\\\|,<>\/?]+\.py$')
	bkp_regex = re.compile(r'(backup\|anaconda2_compartilhados\|lixeira\|anaconda2\|bkp)')
	comments_regex = re.compile(r'(#\|\*)')
	# results_storage_m = []
	# results_3d = []
	results = []

	for root, dirs, files in walk('/work', topdown=True):
	dirs[:] = list(filter(lambda d: d not in exclude and pattern.match(d), dirs))
	files[:] = list(filter(lambda f: files_to_read.match(f), files))
	files[:] = list(filter(lambda f: search_pattern(db_regex, join(root, f)), files))

	# files_storage_m = list(filter(lambda f: search_pattern(storage_regex, join(root, f)), files))
	# files_3d = [search_pattern(file3d_regex, join(root, f)) for f in files]
	# files_3d = list(filter(lambda f: search_pattern(file3d_regex, join(root, f)), files))
	# results_storage_m.extend(list(map(lambda f: join(root, f), files_storage_m)))
	# results_3d.extend(files_3d)
	# results_3d.extend(list(map(lambda f: join(root,f), files_3d)))
	results.extend(list(map(lambda f: join(root, f), files)))

	with open('db_conn.json', 'w') as jsonfile:
	dump(results, jsonfile)

	duration = datetime.now() - now
	logger.info(f'[I] {duration}')
	logger.info(f'[I] {datetime.now()}')