Last active
February 19, 2021 15:07
-
-
Save AMR-KELEG/de38fe639703768cc506921cdb694f48 to your computer and use it in GitHub Desktop.
marks-merge.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import pandas as pd" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 76, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Load offline grades sheet\n", | |
"all_marks = pd.read_excel(\"E5-full-marks.xlsx\")\n", | |
"all_marks[\"Student Code\"] = all_marks[\"Student Code\"].apply(lambda s: str(s).lower())" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 77, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"<class 'pandas.core.frame.DataFrame'>\n", | |
"RangeIndex: 235 entries, 0 to 234\n", | |
"Data columns (total 1 columns):\n", | |
" # Column Non-Null Count Dtype \n", | |
"--- ------ -------------- ----- \n", | |
" 0 E5 163 non-null float64\n", | |
"dtypes: float64(1)\n", | |
"memory usage: 2.0 KB\n" | |
] | |
} | |
], | |
"source": [ | |
"# Check the non-empty values\n", | |
"all_marks[[\"E5\"]].info()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 78, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Student Code</th>\n", | |
" <th>E5</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>1205036</td>\n", | |
" <td>10.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>1400742</td>\n", | |
" <td>10.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>14t0046</td>\n", | |
" <td>9.5</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>1500400</td>\n", | |
" <td>10.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>15x0031</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Student Code E5\n", | |
"0 1205036 10.0\n", | |
"1 1400742 10.0\n", | |
"2 14t0046 9.5\n", | |
"3 1500400 10.0\n", | |
"4 15x0031 NaN" | |
] | |
}, | |
"execution_count": 78, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# Note that the student code is lower-cased\n", | |
"all_marks[[\"Student Code\", \"E5\"]].head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 79, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Load the online marks sheet\n", | |
"online_marks = pd.read_excel(\"E5-online.xlsx\")\n", | |
"online_marks[\"E5\"] = online_marks[\"E5\"].apply(lambda d: None if str(d).strip()==\"-\" else d)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 80, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Generate a student code column using the student's email address\n", | |
"online_marks[\"Student Code\"] = online_marks[\"Email address\"].apply(lambda l: l[0:7])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 62, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Student Code</th>\n", | |
" <th>E5</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>03t0065</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>1205036</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>12x0224</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>1300204</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>1400742</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Student Code E5\n", | |
"0 03t0065 NaN\n", | |
"1 1205036 NaN\n", | |
"2 12x0224 NaN\n", | |
"3 1300204 NaN\n", | |
"4 1400742 NaN" | |
] | |
}, | |
"execution_count": 62, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"online_marks[[\"Student Code\", \"E5\"]].head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 20, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"<class 'pandas.core.frame.DataFrame'>\n", | |
"RangeIndex: 236 entries, 0 to 235\n", | |
"Data columns (total 1 columns):\n", | |
" # Column Non-Null Count Dtype \n", | |
"--- ------ -------------- ----- \n", | |
" 0 E5 63 non-null float64\n", | |
"dtypes: float64(1)\n", | |
"memory usage: 2.0 KB\n" | |
] | |
} | |
], | |
"source": [ | |
"# Check the non-empty values\n", | |
"online_marks[[\"E5\"]].info()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Merge the sheets" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 64, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Student Code</th>\n", | |
" <th>E5_x</th>\n", | |
" <th>E5_y</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>1205036</td>\n", | |
" <td>10.0</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>1400742</td>\n", | |
" <td>10.0</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>14t0046</td>\n", | |
" <td>9.5</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>1500400</td>\n", | |
" <td>10.0</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>15x0031</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Student Code E5_x E5_y\n", | |
"0 1205036 10.0 NaN\n", | |
"1 1400742 10.0 NaN\n", | |
"2 14t0046 9.5 NaN\n", | |
"3 1500400 10.0 NaN\n", | |
"4 15x0031 NaN NaN" | |
] | |
}, | |
"execution_count": 64, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"merged_df = pd.merge(all_marks[[\"Student Code\", \"E5\"]],\n", | |
" online_marks[[\"Student Code\", \"E5\"]],\n", | |
" on=\"Student Code\",\n", | |
" how=\"left\")\n", | |
"merged_df.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 81, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Set empty values to -1 so that I can merge the degrees using the max function\n", | |
"merged_df[\"E5_x\"].fillna(-1, inplace=True)\n", | |
"merged_df[\"E5_y\"].fillna(-1, inplace=True)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 67, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"merged_df[\"E5\"] = merged_df.apply(lambda row: max(row[\"E5_x\"], row[\"E5_y\"]), axis=1)\n", | |
"merged_df[\"E5\"] = merged_df[\"E5\"].apply(lambda d: None if d==-1 else d)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 82, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"<class 'pandas.core.frame.DataFrame'>\n", | |
"Int64Index: 235 entries, 0 to 234\n", | |
"Data columns (total 1 columns):\n", | |
" # Column Non-Null Count Dtype \n", | |
"--- ------ -------------- ----- \n", | |
" 0 E5 226 non-null float64\n", | |
"dtypes: float64(1)\n", | |
"memory usage: 3.7 KB\n" | |
] | |
} | |
], | |
"source": [ | |
"# Check the number of non-empty values after merging\n", | |
"merged_df[[\"E5\"]].info()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 83, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"226" | |
] | |
}, | |
"execution_count": 83, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# Check the number of expected non-empty values before merging\n", | |
"((online_marks.shape[0] - online_marks[\"E5\"].isnull().sum()) + \n", | |
"(all_marks.shape[0] - all_marks[\"E5\"].isnull().sum()))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 86, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Export the marks\n", | |
"merged_df[[\"Student Code\", \"E5\"]].to_excel(\"E5-final.xlsx\", index=False)" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.6" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment