mehalter/requirements.txt

## requirements.txt
openpyxl==3.0.7
pandas==1.3.2

## translate.py
import os

import numpy as np
import pandas as pd

# Directory to read in xlsx files from
data_dir = r"data"

output_file = r"output.csv"

# Indexes of interested columns
columns = {
    "TP1": 1,
    "TP2": 2,
    "TP3": 3,
}
# Indexes of interested rows
rows = {
    "TEST": 5,
    "BP": 6,
    "AMBIOLIC": 7,
    "SOMETHING": 14,
    "WELP": 15,
}
# Define entry key
key = {
    "": 0,
    "x": 1,
    "xx": 2,
    "xxx": 3,
    "o": -1,
}

final_df = None

# Iterate over files in data directory, recursively
for subdir, dirs, files in os.walk(data_dir):
    for filename in files:
        # Only look at the xlsx files
        if filename.endswith(".xlsx"):
            # Get the full file path from the file system walk
            filepath = os.path.join(subdir, filename)
            # Open the file into a pandas data frame
            data = pd.read_excel(filepath).replace(np.nan, "", regex=True)
            # Get the id from the filename
            id = os.path.splitext(filename)[0]
            # Make a temporary data frame to hold the data
            entry = pd.DataFrame(columns=[id])

            # Iterate over the interested rows and columns
            for row in rows:
                for col in columns:
                    # Generate the unique variable name
                    variable = "{}_{}".format(row, col)
                    # Set the variable in the temp data frame using the lookup key
                    entry.loc[variable] = key[data.iloc[rows[row], columns[col]]]

            # Transpose entry to be a single row with several columns
            entry = entry.T

            # If a final dataframe hasn't been started, start it
            if final_df is None:
                final_df = entry
            # If there is a final data frame, append the new entry to it
            else:
                final_df = final_df.append(entry)

# Print the final dataframe
print(final_df)
# Output generated data frame as a CSV file
if final_df is not None:
    final_df.to_csv(output_file, index_label="id")
	import os

	import numpy as np
	import pandas as pd

	# Directory to read in xlsx files from
	data_dir = r"data"

	output_file = r"output.csv"

	# Indexes of interested columns
	columns = {
	"TP1": 1,
	"TP2": 2,
	"TP3": 3,
	}
	# Indexes of interested rows
	rows = {
	"TEST": 5,
	"BP": 6,
	"AMBIOLIC": 7,
	"SOMETHING": 14,
	"WELP": 15,
	}
	# Define entry key
	key = {
	"": 0,
	"x": 1,
	"xx": 2,
	"xxx": 3,
	"o": -1,
	}

	final_df = None

	# Iterate over files in data directory, recursively
	for subdir, dirs, files in os.walk(data_dir):
	for filename in files:
	# Only look at the xlsx files
	if filename.endswith(".xlsx"):
	# Get the full file path from the file system walk
	filepath = os.path.join(subdir, filename)
	# Open the file into a pandas data frame
	data = pd.read_excel(filepath).replace(np.nan, "", regex=True)
	# Get the id from the filename
	id = os.path.splitext(filename)[0]
	# Make a temporary data frame to hold the data
	entry = pd.DataFrame(columns=[id])

	# Iterate over the interested rows and columns
	for row in rows:
	for col in columns:
	# Generate the unique variable name
	variable = "{}_{}".format(row, col)
	# Set the variable in the temp data frame using the lookup key
	entry.loc[variable] = key[data.iloc[rows[row], columns[col]]]

	# Transpose entry to be a single row with several columns
	entry = entry.T

	# If a final dataframe hasn't been started, start it
	if final_df is None:
	final_df = entry
	# If there is a final data frame, append the new entry to it
	else:
	final_df = final_df.append(entry)

	# Print the final dataframe
	print(final_df)
	# Output generated data frame as a CSV file
	if final_df is not None:
	final_df.to_csv(output_file, index_label="id")