Skip to content

Instantly share code, notes, and snippets.

@ma0c
Last active November 27, 2018 19:03
Show Gist options
  • Save ma0c/7855e0f58a80d5b1fe2c39602dfdd8f4 to your computer and use it in GitHub Desktop.
Save ma0c/7855e0f58a80d5b1fe2c39602dfdd8f4 to your computer and use it in GitHub Desktop.
Update filter for file prefix
import os
import argparse
import logging
logger = logging.getLogger()
logger.addHandler(logging.StreamHandler())
def parse_data():
"""
Create a argument parser to configure script in a vervose way
"""
parser = argparse.ArgumentParser(
"Extract data from files, rows and columns"
)
parser.add_argument(
'--folder',
help="The folder where the script is going to search for the files",
default="."
)
parser.add_argument(
'--dir_prefix',
help="Dir prefix of files to evaluate",
default="P"
)
parser.add_argument(
'--file_prefix',
help="File prefix of files to evaluate",
default="avr"
)
parser.add_argument(
'--from_line',
help="Analyzer consider this line number to begin analysis",
default=0,
type=int
)
parser.add_argument(
'--to_line',
help="Analyzer consider this line to end analysis",
default=1,
type=int
)
parser.add_argument(
'--separator',
help="Each line will be separated using this column separator argument",
default=","
)
parser.add_argument(
'--columns',
help="List of columns to extract",
default="0,1,2"
)
parser.add_argument(
'--output',
help="File name for output",
default="output.txt"
)
parser.add_argument(
'--logger_level',
help="Logger leven, values available: DEBUG, INFO, WARNING, ERROR, CRITICAL",
default="ERROR"
)
parser.add_argument(
'--std_output',
action='store_true',
help="Print output"
)
return parser.parse_args()
def collect():
# Collect data from command line args
data = parse_data()
try:
logger.setLevel(data.logger_level)
except ValueError:
logger.error("Invalid logger_level: values available: DEBUG, INFO, WARNING, ERROR, CRITICAL")
exit(0)
logger.info(vars(data))
# Get the list of index columns to extract
try:
columns = [int(x) for x in data.columns.split(",")]
except ValueError:
logger.error("Columns argument must be a comma separated list of integers: 1,3,5")
exit(1)
# Define a list to store data
extracted_data = list()
# Iterate over all files on specified folder
logger.debug("Dirs on folder: {}".format(", ".join(os.listdir(data.folder))))
for dir_name in os.listdir(data.folder):
logger.info("Exploring dir {}".format(dir_name))
# Just consider the files with the specified prefix
if dir_name.startswith(data.dir_prefix):
logger.debug("Dir {} has the prefix {}".format(dir_name, data.dir_prefix))
dir_path = os.path.join(
data.folder,
dir_name
)
for file_name in os.listdir(dir_path):
if file_name.startswith(data.file_prefix):
logger.debug("File {} has the prefix {}".format(file_name, data.file_prefix))
# Open each file with
file_readed = open(
os.path.join(
dir_path,
file_name
)
)
# Now read all lines:
# logger.info(enumerate(file_readed.readlines()))
for index, line in enumerate(file_readed.readlines()):
logger.debug("Exploring line {} with content: {}".format(index, line))
logger.debug((data.from_line, index, data.to_line))
if index > data.to_line:
logger.debug("Interrupting for because index greater than line")
break
if data.from_line <= index < data.to_line:
# Define a list to store data from current line
extracted_data_from_line = list()
# Split the current line with the specified separator
logger.debug("Separator: ")
logger.debug(data.separator)
splitted_line = line.split(data.separator if data.separator else None)
logger.debug(splitted_line)
for column in columns:
try:
logger.debug("Appending value {}".format(splitted_line[column]))
extracted_data_from_line.append(splitted_line[column])
except IndexError:
logger.error(
"In file {file} the line {line} does not contain colum {column}".format(
file=file_name,
line=line,
column=column
)
)
exit(1)
extracted_data.append(extracted_data_from_line)
else:
logger.debug("File {} has NOT the prefix {}".format(file_name, data.file_prefix))
else:
logger.debug("Dir {} has NOT the prefix {}".format(dir_name, data.dir_prefix))
# Open the specified output file with write and creation permission
output_file = open(data.output, "w+")
for new_line in extracted_data:
# Write in the output file the extracted content separated with the separator specified and
# a new line character at the end
joining_character = data.separator if data.separator else " "
current_output_line = joining_character.join(new_line)
output_file.write("{}\n".format(current_output_line))
if data.std_output:
print(current_output_line)
# Close the output file
output_file.close()
if __name__ == "__main__":
collect()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment