Skip to content

Instantly share code, notes, and snippets.

@bhuron
Last active May 15, 2022 16:28
Show Gist options
  • Save bhuron/4e7b57b9e8f9cfa5760dcd60467630c4 to your computer and use it in GitHub Desktop.
Save bhuron/4e7b57b9e8f9cfa5760dcd60467630c4 to your computer and use it in GitHub Desktop.
Exercice sur le traitement de données
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "import numpy as np\nimport pandas as pd\nimport matplotlib.pyplot as plt",
"execution_count": 1,
"outputs": []
},
{
"metadata": {},
"cell_type": "markdown",
"source": "### Exercice 2 : La course d'athlétisme"
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "data = [48.65, 49.20, 50, 50.12, 50.13, 50.45, 51, 51.80, 51.85, 51.90,\n 52.05, 52.20, 52.60, 53.28, 54.80]\nresults = pd.Series(data)\nresults.describe()",
"execution_count": 2,
"outputs": [
{
"output_type": "execute_result",
"execution_count": 2,
"data": {
"text/plain": "count 15.000000\nmean 51.335333\nstd 1.621793\nmin 48.650000\n25% 50.125000\n50% 51.800000\n75% 52.125000\nmax 54.800000\ndtype: float64"
},
"metadata": {}
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "étendue = results.max() - results.min()\nmoyenne = round(results.mean(), 2)\nmédiane = results.median()",
"execution_count": 3,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "part_lents = len(results[results < 52.50]) / len(results) * 100\nprint(f'{part_lents}% de coureurs ont mis moins de 52,50 secondes pour le 400m')",
"execution_count": 4,
"outputs": [
{
"output_type": "stream",
"text": "80.0% de coureurs ont mis moins de 52,50 secondes pour le 400m\n",
"name": "stdout"
}
]
},
{
"metadata": {},
"cell_type": "markdown",
"source": "### Exercice 3"
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "intervals = pd.interval_range(start=0, end=120, freq=30, closed='left')\neffectifs = [106, 671 - (106 + 235 + 144), 235, 144]\ntemps_travail = pd.DataFrame({'Durée de travail (mn)': intervals,\n 'Centre de classe' : intervals.mid,\n 'Effectifs': effectifs})\ntemps_travail['Fréquences (%)'] = round(temps_travail['Effectifs'] / sum(effectifs) * 100, 0)\ntemps_travail",
"execution_count": 5,
"outputs": [
{
"output_type": "execute_result",
"execution_count": 5,
"data": {
"text/plain": " Durée de travail (mn) Centre de classe Effectifs Fréquences (%)\n0 [0, 30) 15.0 106 16.0\n1 [30, 60) 45.0 186 28.0\n2 [60, 90) 75.0 235 35.0\n3 [90, 120) 105.0 144 21.0",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Durée de travail (mn)</th>\n <th>Centre de classe</th>\n <th>Effectifs</th>\n <th>Fréquences (%)</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>[0, 30)</td>\n <td>15.0</td>\n <td>106</td>\n <td>16.0</td>\n </tr>\n <tr>\n <th>1</th>\n <td>[30, 60)</td>\n <td>45.0</td>\n <td>186</td>\n <td>28.0</td>\n </tr>\n <tr>\n <th>2</th>\n <td>[60, 90)</td>\n <td>75.0</td>\n <td>235</td>\n <td>35.0</td>\n </tr>\n <tr>\n <th>3</th>\n <td>[90, 120)</td>\n <td>105.0</td>\n <td>144</td>\n <td>21.0</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"metadata": {}
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "moyenne = round(np.average(temps_travail['Centre de classe'], weights=temps_travail['Effectifs']), 0)\nprint(f'Un élève passe en moyenne {moyenne} minutes par jour pour faire ses devoirs.')",
"execution_count": 6,
"outputs": [
{
"output_type": "stream",
"text": "Un élève passe en moyenne 64.0 minutes par jour pour faire ses devoirs.\n",
"name": "stdout"
}
]
},
{
"metadata": {},
"cell_type": "markdown",
"source": "### Exercice 4"
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "age_intervals = pd.interval_range(start=0, end= 90, freq=10, closed='left')\neffectifs = [27, 45, 48, 39, 42, 36, 33, 24, 6]\nskieurs = pd.DataFrame({'Âge': age_intervals,\n 'Centre de classe': age_intervals.mid,\n 'Effectifs': effectifs})\nskieurs['Eff. cumulés croissants'] = skieurs['Effectifs'].cumsum()\nskieurs",
"execution_count": 7,
"outputs": [
{
"output_type": "execute_result",
"execution_count": 7,
"data": {
"text/plain": " Âge Centre de classe Effectifs Eff. cumulés croissants\n0 [0, 10) 5.0 27 27\n1 [10, 20) 15.0 45 72\n2 [20, 30) 25.0 48 120\n3 [30, 40) 35.0 39 159\n4 [40, 50) 45.0 42 201\n5 [50, 60) 55.0 36 237\n6 [60, 70) 65.0 33 270\n7 [70, 80) 75.0 24 294\n8 [80, 90) 85.0 6 300",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Âge</th>\n <th>Centre de classe</th>\n <th>Effectifs</th>\n <th>Eff. cumulés croissants</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>[0, 10)</td>\n <td>5.0</td>\n <td>27</td>\n <td>27</td>\n </tr>\n <tr>\n <th>1</th>\n <td>[10, 20)</td>\n <td>15.0</td>\n <td>45</td>\n <td>72</td>\n </tr>\n <tr>\n <th>2</th>\n <td>[20, 30)</td>\n <td>25.0</td>\n <td>48</td>\n <td>120</td>\n </tr>\n <tr>\n <th>3</th>\n <td>[30, 40)</td>\n <td>35.0</td>\n <td>39</td>\n <td>159</td>\n </tr>\n <tr>\n <th>4</th>\n <td>[40, 50)</td>\n <td>45.0</td>\n <td>42</td>\n <td>201</td>\n </tr>\n <tr>\n <th>5</th>\n <td>[50, 60)</td>\n <td>55.0</td>\n <td>36</td>\n <td>237</td>\n </tr>\n <tr>\n <th>6</th>\n <td>[60, 70)</td>\n <td>65.0</td>\n <td>33</td>\n <td>270</td>\n </tr>\n <tr>\n <th>7</th>\n <td>[70, 80)</td>\n <td>75.0</td>\n <td>24</td>\n <td>294</td>\n </tr>\n <tr>\n <th>8</th>\n <td>[80, 90)</td>\n <td>85.0</td>\n <td>6</td>\n <td>300</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"metadata": {}
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "age_moyen = np.average(skieurs['Centre de classe'], weights=skieurs['Effectifs'])\nprint(f\"L'âge moyen des skieurs est de {age_moyen} ans.\")",
"execution_count": 8,
"outputs": [
{
"output_type": "stream",
"text": "L'âge moyen des skieurs est de 39.0 ans.\n",
"name": "stdout"
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "jeunes_skieurs = skieurs[skieurs['Âge'].apply(lambda i: i.overlaps(pd.Interval(left=0, right=20, closed='left')))]\npercent = sum(jeunes_skieurs['Effectifs']) / sum(skieurs['Effectifs']) * 100\nprint(f\"La fréquence en pourcentage de skieurs ayant moins de 20 ans est de {percent} %\")",
"execution_count": 9,
"outputs": [
{
"output_type": "stream",
"text": "La fréquence en pourcentage de skieurs ayant moins de 20 ans est de 24.0 %\n",
"name": "stdout"
}
]
}
],
"metadata": {
"interpreter": {
"hash": "b0fa6594d8f4cbf19f97940f81e996739fb7646882a419484c72d19e05852a7e"
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3 (ipykernel)",
"language": "python"
},
"language_info": {
"name": "python",
"version": "3.9.10",
"mimetype": "text/x-python",
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"pygments_lexer": "ipython3",
"nbconvert_exporter": "python",
"file_extension": ".py"
},
"gist": {
"id": "",
"data": {
"description": "traitement-données.ipynb",
"public": true
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment