Last active
November 27, 2018 19:03
-
-
Save ma0c/7855e0f58a80d5b1fe2c39602dfdd8f4 to your computer and use it in GitHub Desktop.
Update filter for file prefix
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import argparse | |
import logging | |
logger = logging.getLogger() | |
logger.addHandler(logging.StreamHandler()) | |
def parse_data(): | |
""" | |
Create a argument parser to configure script in a vervose way | |
""" | |
parser = argparse.ArgumentParser( | |
"Extract data from files, rows and columns" | |
) | |
parser.add_argument( | |
'--folder', | |
help="The folder where the script is going to search for the files", | |
default="." | |
) | |
parser.add_argument( | |
'--dir_prefix', | |
help="Dir prefix of files to evaluate", | |
default="P" | |
) | |
parser.add_argument( | |
'--file_prefix', | |
help="File prefix of files to evaluate", | |
default="avr" | |
) | |
parser.add_argument( | |
'--from_line', | |
help="Analyzer consider this line number to begin analysis", | |
default=0, | |
type=int | |
) | |
parser.add_argument( | |
'--to_line', | |
help="Analyzer consider this line to end analysis", | |
default=1, | |
type=int | |
) | |
parser.add_argument( | |
'--separator', | |
help="Each line will be separated using this column separator argument", | |
default="," | |
) | |
parser.add_argument( | |
'--columns', | |
help="List of columns to extract", | |
default="0,1,2" | |
) | |
parser.add_argument( | |
'--output', | |
help="File name for output", | |
default="output.txt" | |
) | |
parser.add_argument( | |
'--logger_level', | |
help="Logger leven, values available: DEBUG, INFO, WARNING, ERROR, CRITICAL", | |
default="ERROR" | |
) | |
parser.add_argument( | |
'--std_output', | |
action='store_true', | |
help="Print output" | |
) | |
return parser.parse_args() | |
def collect(): | |
# Collect data from command line args | |
data = parse_data() | |
try: | |
logger.setLevel(data.logger_level) | |
except ValueError: | |
logger.error("Invalid logger_level: values available: DEBUG, INFO, WARNING, ERROR, CRITICAL") | |
exit(0) | |
logger.info(vars(data)) | |
# Get the list of index columns to extract | |
try: | |
columns = [int(x) for x in data.columns.split(",")] | |
except ValueError: | |
logger.error("Columns argument must be a comma separated list of integers: 1,3,5") | |
exit(1) | |
# Define a list to store data | |
extracted_data = list() | |
# Iterate over all files on specified folder | |
logger.debug("Dirs on folder: {}".format(", ".join(os.listdir(data.folder)))) | |
for dir_name in os.listdir(data.folder): | |
logger.info("Exploring dir {}".format(dir_name)) | |
# Just consider the files with the specified prefix | |
if dir_name.startswith(data.dir_prefix): | |
logger.debug("Dir {} has the prefix {}".format(dir_name, data.dir_prefix)) | |
dir_path = os.path.join( | |
data.folder, | |
dir_name | |
) | |
for file_name in os.listdir(dir_path): | |
if file_name.startswith(data.file_prefix): | |
logger.debug("File {} has the prefix {}".format(file_name, data.file_prefix)) | |
# Open each file with | |
file_readed = open( | |
os.path.join( | |
dir_path, | |
file_name | |
) | |
) | |
# Now read all lines: | |
# logger.info(enumerate(file_readed.readlines())) | |
for index, line in enumerate(file_readed.readlines()): | |
logger.debug("Exploring line {} with content: {}".format(index, line)) | |
logger.debug((data.from_line, index, data.to_line)) | |
if index > data.to_line: | |
logger.debug("Interrupting for because index greater than line") | |
break | |
if data.from_line <= index < data.to_line: | |
# Define a list to store data from current line | |
extracted_data_from_line = list() | |
# Split the current line with the specified separator | |
logger.debug("Separator: ") | |
logger.debug(data.separator) | |
splitted_line = line.split(data.separator if data.separator else None) | |
logger.debug(splitted_line) | |
for column in columns: | |
try: | |
logger.debug("Appending value {}".format(splitted_line[column])) | |
extracted_data_from_line.append(splitted_line[column]) | |
except IndexError: | |
logger.error( | |
"In file {file} the line {line} does not contain colum {column}".format( | |
file=file_name, | |
line=line, | |
column=column | |
) | |
) | |
exit(1) | |
extracted_data.append(extracted_data_from_line) | |
else: | |
logger.debug("File {} has NOT the prefix {}".format(file_name, data.file_prefix)) | |
else: | |
logger.debug("Dir {} has NOT the prefix {}".format(dir_name, data.dir_prefix)) | |
# Open the specified output file with write and creation permission | |
output_file = open(data.output, "w+") | |
for new_line in extracted_data: | |
# Write in the output file the extracted content separated with the separator specified and | |
# a new line character at the end | |
joining_character = data.separator if data.separator else " " | |
current_output_line = joining_character.join(new_line) | |
output_file.write("{}\n".format(current_output_line)) | |
if data.std_output: | |
print(current_output_line) | |
# Close the output file | |
output_file.close() | |
if __name__ == "__main__": | |
collect() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment