JnyJny/grade_csv.py

## grade_csv.py
#!/usr/bin/env python3

import click
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

# Requirements:
# python3 -m pip install -U click pandas matplotlib numpy xlwt

class Grade:
    """The Grade class exists to simplify translating a floating-point value
    to a traditional letter grade.  The variables 'bin' and 'cotes' are
    class scope, which mean they are created the first time the class
    is used and not recreated. This is a small space/time optimization.

    The only method defined is a classmethod, 'to_cote'.

    Our other choice was to make bins and cotes global variables,
    and write a function as in the original source. I will go to
    great lengths to avoid global variables, they tend to cause
    far more problems than they solve.

    """

    bins = [0, 34, 49, 53, 56, 59, 64, 69, 72, 76, 79, 84, 89, 100.5]
    cotes = ["F", "E", "D", "D+", "C-", "C", "C+", "B-", "B", "B+", "A-", "A", "A+"]

    @classmethod
    def to_cote(cls, total: float) -> str:
        """Given a floating point `total`, return a corresponding letter
        grade.
        """
        index = np.digitize(total, cls.bins) - 1
        return cls.cotes[index]

    def __init__(self):
        raise NotImplementedError("Grade not implemented")


def read_grades(filename: str) -> pd.DataFrame:
    """Reads the contents of the given CSV file into a pandas.DataFrame.

    Three columns are added to the DataFrame: Total, Cote and `Prenom, Nom`.

    Total is the sum of the values between columns 5 and 11, inclusive.
    Cote is the letter grade representation of Total
    `Prenom, Nom` is a concatenation of Prenom and Nom columns.

    """

    # Pandas has a very robust CSV parser

    df = pd.read_csv(filename)

    # the next three lines create three new columns in the dataframe
    # derived from values read from the file.

    df["Total"] = df[df.columns[5:12]].T.sum()

    df["Cote"] = df.Total.apply(Grade.to_cote)

    df["Prenom, Nom"] = df.Prenom + ", " + df.Nom

    return df


def plot_total_distribution(
    df: pd.DataFrame, preview: bool = False, output_filename: str = None
) -> None:
    """Given a pandas.DataFrame with a Total column, generates a
    histogram plot. If preview is True, matplotlib.pyplot.show() is
    called to display it immediately. If an output_filename is
    given, the plot is saved to a file with that name.
    """

    # Pandas has a great interface to matplotlib that greatly
    # simplifies creating plots. This plot isn't exactly like
    # the one in your original source, but it's pretty close.

    df.Total.plot.hist()

    if output_filename:
        print("saving plot to: ", output_filename)
        plt.savefig(output_filename, bbox_inches="tight")

    if preview:
        print("Previewing, close window to continue...")
        plt.show()


def write_summary_xlsx(df, output_filename: str) -> None:
    """Given a pandas.DataFrame with columns 'Prenom, Nom',
    Matricule, and Cote, generates a summary XLSX spreadsheet
    file and writes it to the given output_filename.
    """

    fullname = "Prenom, Nom"

    # here we create a summary DataFrame composed of three
    # columns, "Prenom, Nom", Matricule and Cote.

    summary = df[[fullname, "Matricule", "Cote"]]

    # The next line changes the index of the dataframe to
    # the fullname column and "drops" that column and modifies
    # the current 'summary' dataframe instead of returning a
    # new modified Dataframe (the pandas default mode of operation).

    summary.set_index(fullname, drop=True, inplace=True)

    # Ok this is magic, but the good kind. It's simple.

    if outputfile_name.endswith(".xlsx"):
        summary.to_excel(output_filename)
        return

    writer = pd.ExcelWriter(output_filename)
    summary.to_excel(writer)
    writer.save()


@click.command()
@click.argument("input-file", type=click.Path(readable=True))
@click.option(
    "-p",
    "--output-plot",
    type=click.Path(exists=False),
    default=None,
    help="Grade distribution graph in PNG format.",
)
@click.option("-P", "--preview-plot", is_flag=True, default=False)
@click.option("-x", "--output-xls", type=click.Path(), default=None)
def main_cli(input_file, output_plot, preview_plot, output_xls):
    """Processes the student grade data from a CSV file,
    optionally generates a grade distribution plot with preview,
    and optionally writes student grade data to an Excel spreadsheet.
    """

    # step one, read student data

    df = read_grades(input_file)

    # the pandas.DataFrame.describe function is essentially
    # your print_stats function.

    print(df.Total.describe())

    # if the user specifies output_xls, the student data
    # is summarized and written to an Excel file.
    if output_xls:
        write_summary_xls(df, output_xls)
        print("wrote XLS summary to: ", output_xls)
    plot_total_distribution(df, preview_plot, output_plot)


if __name__ == "__main__":
    main_cli()
	#!/usr/bin/env python3

	import click
	import numpy as np
	import matplotlib.pyplot as plt
	import pandas as pd

	# Requirements:
	# python3 -m pip install -U click pandas matplotlib numpy xlwt

	class Grade:
	"""The Grade class exists to simplify translating a floating-point value
	to a traditional letter grade. The variables 'bin' and 'cotes' are
	class scope, which mean they are created the first time the class
	is used and not recreated. This is a small space/time optimization.

	The only method defined is a classmethod, 'to_cote'.

	Our other choice was to make bins and cotes global variables,
	and write a function as in the original source. I will go to
	great lengths to avoid global variables, they tend to cause
	far more problems than they solve.

	"""

	bins = [0, 34, 49, 53, 56, 59, 64, 69, 72, 76, 79, 84, 89, 100.5]
	cotes = ["F", "E", "D", "D+", "C-", "C", "C+", "B-", "B", "B+", "A-", "A", "A+"]

	@classmethod
	def to_cote(cls, total: float) -> str:
	"""Given a floating point `total`, return a corresponding letter
	grade.
	"""
	index = np.digitize(total, cls.bins) - 1
	return cls.cotes[index]

	def __init__(self):
	raise NotImplementedError("Grade not implemented")


	def read_grades(filename: str) -> pd.DataFrame:
	"""Reads the contents of the given CSV file into a pandas.DataFrame.

	Three columns are added to the DataFrame: Total, Cote and `Prenom, Nom`.

	Total is the sum of the values between columns 5 and 11, inclusive.
	Cote is the letter grade representation of Total
	`Prenom, Nom` is a concatenation of Prenom and Nom columns.

	"""

	# Pandas has a very robust CSV parser

	df = pd.read_csv(filename)

	# the next three lines create three new columns in the dataframe
	# derived from values read from the file.

	df["Total"] = df[df.columns[5:12]].T.sum()

	df["Cote"] = df.Total.apply(Grade.to_cote)

	df["Prenom, Nom"] = df.Prenom + ", " + df.Nom

	return df


	def plot_total_distribution(
	df: pd.DataFrame, preview: bool = False, output_filename: str = None
	) -> None:
	"""Given a pandas.DataFrame with a Total column, generates a
	histogram plot. If preview is True, matplotlib.pyplot.show() is
	called to display it immediately. If an output_filename is
	given, the plot is saved to a file with that name.
	"""

	# Pandas has a great interface to matplotlib that greatly
	# simplifies creating plots. This plot isn't exactly like
	# the one in your original source, but it's pretty close.

	df.Total.plot.hist()

	if output_filename:
	print("saving plot to: ", output_filename)
	plt.savefig(output_filename, bbox_inches="tight")

	if preview:
	print("Previewing, close window to continue...")
	plt.show()


	def write_summary_xlsx(df, output_filename: str) -> None:
	"""Given a pandas.DataFrame with columns 'Prenom, Nom',
	Matricule, and Cote, generates a summary XLSX spreadsheet
	file and writes it to the given output_filename.
	"""

	fullname = "Prenom, Nom"

	# here we create a summary DataFrame composed of three
	# columns, "Prenom, Nom", Matricule and Cote.

	summary = df[[fullname, "Matricule", "Cote"]]

	# The next line changes the index of the dataframe to
	# the fullname column and "drops" that column and modifies
	# the current 'summary' dataframe instead of returning a
	# new modified Dataframe (the pandas default mode of operation).

	summary.set_index(fullname, drop=True, inplace=True)

	# Ok this is magic, but the good kind. It's simple.

	if outputfile_name.endswith(".xlsx"):
	summary.to_excel(output_filename)
	return

	writer = pd.ExcelWriter(output_filename)
	summary.to_excel(writer)
	writer.save()


	@click.command()
	@click.argument("input-file", type=click.Path(readable=True))
	@click.option(
	"-p",
	"--output-plot",
	type=click.Path(exists=False),
	default=None,
	help="Grade distribution graph in PNG format.",
	)
	@click.option("-P", "--preview-plot", is_flag=True, default=False)
	@click.option("-x", "--output-xls", type=click.Path(), default=None)
	def main_cli(input_file, output_plot, preview_plot, output_xls):
	"""Processes the student grade data from a CSV file,
	optionally generates a grade distribution plot with preview,
	and optionally writes student grade data to an Excel spreadsheet.
	"""

	# step one, read student data

	df = read_grades(input_file)

	# the pandas.DataFrame.describe function is essentially
	# your print_stats function.

	print(df.Total.describe())

	# if the user specifies output_xls, the student data
	# is summarized and written to an Excel file.
	if output_xls:
	write_summary_xls(df, output_xls)
	print("wrote XLS summary to: ", output_xls)
	plot_total_distribution(df, preview_plot, output_plot)


	if __name__ == "__main__":
	main_cli()