Skip to content

Instantly share code, notes, and snippets.

@meraioth
Created October 11, 2023 19:28
Show Gist options
  • Save meraioth/7a4562b466067541853f5d886d538b05 to your computer and use it in GitHub Desktop.
Save meraioth/7a4562b466067541853f5d886d538b05 to your computer and use it in GitHub Desktop.
analisis deuda tecnica.ipynb
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": [],
"name": "analisis deuda tecnica.ipynb",
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/gist/meraioth/7a4562b466067541853f5d886d538b05/-an-lisis-deuda-t-cnica.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "markdown",
"source": [
"# Imports"
],
"metadata": {
"id": "L9gqMsawoRBX"
}
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "uXOPMnKJVAVI"
},
"outputs": [],
"source": [
"# imports\n",
"import requests\n",
"import pandas as pd\n",
"from datetime import date\n",
"from dateutil.relativedelta import relativedelta\n",
"from google.colab import drive\n",
"import logging\n",
"import json\n",
"import re\n",
"import matplotlib.pyplot as plt\n",
"!pip install circlify --quiet\n",
"import circlify\n",
"!pip install colour --quiet\n",
"from colour import Color\n",
"import warnings\n"
]
},
{
"cell_type": "markdown",
"source": [
"# Ownership"
],
"metadata": {
"id": "xq5ByMC1rueE"
}
},
{
"cell_type": "markdown",
"source": [
"En Buk tenemos un equipo owner sobre cada archivo, en nuestro caso este comando nos entrega un listado de archivos del equipo rmcl (Remuneraciones Chile), de aca solo queremos analizar los archivos de la carpeta app de nuestro monolito en Rails\n",
"\n",
"\n",
"```\n",
"rake ownership:details | grep rmcl | awk -F':' '{print $4}' | grep app > team_files.csv\n",
"```\n",
"\n"
],
"metadata": {
"id": "xfRAKIpX6BeB"
}
},
{
"cell_type": "code",
"source": [
"# Leer listado de archivos a analizar\n",
"team_files = pd.read_csv('team_files.csv', header=None, names=['entity'])"
],
"metadata": {
"id": "_7YZr6ST6lci"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"# Sentry\n",
"Cada evento de sentry tiene una traza del error, buscamos en esa traza el primer archivo que haga match con nuestro team_files y contamos la cantidad de errores por archivo"
],
"metadata": {
"id": "iSGbcK45VgPs"
}
},
{
"cell_type": "code",
"source": [
"SENTRY_API_BASE_URL = 'Replace with your Sentry API base URL'\n",
"SENTRY_AUTH_TOKEN = 'Replace with your Sentry authentication token'\n",
"PROJECT_ID = 'Replace with your Project Id'\n",
"ENVIRONMENT = 'Replace with your enviroment'\n",
"ORGANIZATION = 'Replace with your organization'\n",
"END_DATE = date.today().strftime(\"%Y-%m-%d\")\n",
"START_DATE = (date.today() - relativedelta(years=1)).strftime(\"%Y-%m-%d\")\n",
"PAGES = 100 # PAGES * 100 = #records\n",
"\n",
"def get_events_stack_files():\n",
" data = []\n",
" url = f\"{SENTRY_API_BASE_URL}/organizations/{ORGANIZATION}/events/?query=(event.type:error AND has:stack.filename)&field=stack.filename&field=count()&field=transaction&project={PROJECT_ID}&start={START_DATE}&end={END_DATE}&sort=-count()&environment={ENVIRONMENT}\"\n",
" headers = {\n",
" 'Authorization': f'Bearer {SENTRY_AUTH_TOKEN}',\n",
" 'Content-Type': 'application/json',\n",
" }\n",
" response = requests.get(url, headers=headers)\n",
" if response.status_code == 200:\n",
" data+=(response.json()['data'])\n",
" else:\n",
" print(f\"Failed to retrieve file statistics. Status code: {response.status_code}\")\n",
" return None\n",
"\t\t# Obtenemos la primera pagina para tener los links de las siguientes\n",
" for i in range(PAGES-1):\n",
" response = requests.get(response.headers['link'].split(';')[3].split(',')[-1].strip()[1:-1], headers=headers)\n",
" data+=(response.json()['data'])\n",
" return data\n",
"\n",
"stack_files_response = get_events_stack_files()\n",
"stack_files = pd.DataFrame.from_dict(stack_files_response)"
],
"metadata": {
"id": "11zDVtydVlLy"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"stack_files = stack_files.reset_index()"
],
"metadata": {
"id": "6bahOwh0zcc2"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"sentry_stats = pd.DataFrame({'stack.filename': stack_files['stack.filename'].explode(), 'index': stack_files['index'] ,'sentry_errors': stack_files['count()'], 'transaction': stack_files['transaction']})\n",
"sentry_stats = sentry_stats.merge(team_files, left_on='stack.filename', right_on='entity', how='inner')\n",
"sentry_stats = (sentry_stats.reset_index()\n",
" .drop_duplicates(subset=['stack.filename', 'index'], keep='last'))"
],
"metadata": {
"id": "nOG_s5bwBfFd"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"sentry_stats = sentry_stats[['entity', 'sentry_errors']]\n",
"sentry_stats = sentry_stats.groupby('entity').agg('sum')"
],
"metadata": {
"id": "erxzgsxEzWnx"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"# Sonar issues"
],
"metadata": {
"id": "mlsz-_Yu8jrL"
}
},
{
"cell_type": "code",
"source": [
"# documentación de la API y las urls en https://sonarcloud.io/web_api/\n",
"ISSUES_URL =\n",
"TOKEN =\n",
"\n",
"# agregar la autentificación\n",
"session = requests.Session()\n",
"session.auth = TOKEN, ''\n",
"\n",
"\n",
"\n",
"# función para obtener la data de las issues\n",
"def fetch_data(url, session, params):\n",
" res = session.get(url, params=params)\n",
" json_string = res.content.decode('utf-8')\n",
" return json.loads(json_string)\n",
"\n",
"\n",
"# función para definir la cantidad de páginas\n",
"def pages_quantity(url, session, params, ps):\n",
" params['p']= 1\n",
" data = fetch_data(url, session, params)\n",
" try:\n",
" total = data['total']\n",
" except:\n",
" total = data['paging']['total']\n",
" return (total // ps) + 1\n",
"\n",
"\n",
"# juntar todas las issues de todas las páginas en un dataframe\n",
"\n",
"keys_of_interest = ['component', 'severity', 'type', 'message']\n",
"\n",
"data_list = []\n",
"n_pages = pages_quantity(ISSUES_URL, session, {}, 500)\n",
"for page in range(1, n_pages + 1):\n",
" page_data = fetch_data(ISSUES_URL, session, {'p': page, 'ps': 500})\n",
" data_list.extend(page_data.get('issues', []))\n",
"\n",
"filtered_data_list = [{key: dct[key] for key in keys_of_interest} for dct in data_list]\n",
"\n",
"sonar_stats = pd.DataFrame(filtered_data_list)\n",
"\n",
"# limpiar los nombres de los archivos\n",
"sonar_stats['component'] = sonar_stats['component'].apply(lambda x: x.split(':')[1] )\n",
"\n",
"# filtrar los issues según los archivos de rmcl\n",
"sonar_stats = sonar_stats[sonar_stats['component'].isin(team_files['entity'])]"
],
"metadata": {
"id": "Yvmxbdfn8mSB"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# contar los todos agrupados por archivo\n",
"sonar_todos = sonar_stats[sonar_stats['message'] == 'Complete the task associated to this TODO comment.']\n",
"todos_stats = sonar_todos.groupby('component').size().reset_index(name='todos').rename(columns={\"component\": \"entity\"})\n",
"# todos_count.to_csv(dataset_dir+\"data/results/filtered_todos.csv\", index=False)"
],
"metadata": {
"id": "3alRbFuk-EhQ"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# contar los code smells agrupados por archivo\n",
"sonar_code_smells = sonar_stats[sonar_stats['type'] == 'CODE_SMELL'][sonar_stats['message'] != 'Complete the task associated to this TODO comment.']\n",
"code_smells_stats = sonar_code_smells.groupby('component').size().reset_index(name='code_smells').rename(columns={\"component\": \"entity\"})"
],
"metadata": {
"id": "OU_v_fUjQWm1"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"# Sonar metrics\n",
"La metricas disponibles y sus definiciones se pueden ver en https://docs.sonarcloud.io/digging-deeper/metric-definitions/"
],
"metadata": {
"id": "OGRZPoZVTign"
}
},
{
"cell_type": "code",
"source": [
"MEASURES_URL = 'https://sonarcloud.io/api/measures/component_tree'\n",
"METRICS = ['complexity','cognitive_complexity','duplicated_lines_density', 'ncloc']\n",
"measures = fetch_data(MEASURES_URL, session, {'component': 'your-key-project', 'metricKeys': (',').join(METRICS),'qualifiers':'FIL','ps':'1'})"
],
"metadata": {
"id": "nG9dkfHdTn7U"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"data_list = []\n",
"n_pages = pages_quantity(MEASURES_URL, session, {'component': 'your-key-project', 'metricKeys': (',').join(METRICS),'qualifiers':'FIL'}, 500 )\n",
"for page in range(1, n_pages + 1):\n",
" page_data = fetch_data(MEASURES_URL, session, {'component': 'your-key-project', 'metricKeys': (',').join(METRICS),'qualifiers':'FIL','p': page, 'ps': 500})\n",
" data_list.extend(page_data['components'])"
],
"metadata": {
"id": "inXZ24JlUAQN"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"sonar_metrics = pd.DataFrame(data_list)\n",
"sonar_metrics = sonar_metrics.merge(team_files, left_on='path', right_on='entity', how='inner')"
],
"metadata": {
"id": "FjxAyn30hj0i"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"sonar_metrics"
],
"metadata": {
"id": "yGXkfh8x3810"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"sonar_metrics = sonar_metrics[['entity', 'measures']]\n",
"sonar_metrics = sonar_metrics.explode('measures')"
],
"metadata": {
"id": "vSpuNIZklalp"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"sonar_metrics"
],
"metadata": {
"id": "9NLD3VQsBYew"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"metric_keys= ['bestValue', 'metric', 'value']\n",
"for key in metric_keys:\n",
" sonar_metrics[key] = sonar_metrics['measures'].apply(lambda x: x.get(key))\n",
"\n",
"sonar_metrics.drop(columns=['measures'], inplace=True)"
],
"metadata": {
"id": "7OVLEgQkBNlV"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"filtered_sonar_metric = []\n",
"for metric in METRICS:\n",
" filtered = sonar_metrics[sonar_metrics['metric'] == metric]\n",
" filtered_sonar_metric.append(filtered.rename(columns={'value': metric}))\n"
],
"metadata": {
"id": "Rjy6Y7ltDwUX"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"grouped_sonar_metric = filtered_sonar_metric[0]\n",
"for i in range(1, len(filtered_sonar_metric)):\n",
" grouped_sonar_metric = grouped_sonar_metric.merge(filtered_sonar_metric[i], on='entity', how='outer')\n"
],
"metadata": {
"id": "j_DyTOUPFU2Z"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"metric_stats = grouped_sonar_metric[['entity']+METRICS]"
],
"metadata": {
"id": "-ItVQWy8DsEB"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"# Coverage\n",
"\n",
"En nuestro caso en particular obtenemos el coverage desde los pipelines de CI/CD\n",
"\n"
],
"metadata": {
"id": "Q-WjdAFvrrur"
}
},
{
"cell_type": "code",
"source": [
"COVERAGE_FILE_PATH = \"coverage.json\"\n",
"\n",
"def read_coverage_file():\n",
" # Depende como obtengas tu coverage debes reescribir esta función\n",
" return None\n",
"\n",
"coverage = read_coverage_file()\n",
"coverage_stats = coverage.merge(team_files, on='entity', how='inner')\n",
"coverage_stats['inverse_coverage'] = 100 - coverage_stats['coverage']\n"
],
"metadata": {
"id": "NN_VMt3IYmIg"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"coverage_stats"
],
"metadata": {
"id": "u-UPp61RGLdP"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"# Code Maat (Github info)"
],
"metadata": {
"id": "Tw37SnpY4Cep"
}
},
{
"cell_type": "markdown",
"source": [
"Lo primero que hay hacer es descargar el registro de cambios de git, esto corriendo lo siguiente dentro de la carpeta de tu proyecto:\n",
"\n",
"\n",
"```\n",
"git log --all --numstat --date=short --pretty=format:'--%h--%ad--%aN' --no-renames --after=2021-06-01 > logfile.log\n",
"```\n",
"Con esto descargamos todos los commits que han sido creados desde el 1 de junio del 2022 y se guardan en el archivo logfile.log\n",
"\n",
"A continuación es necesario clonar el [repositorio](https://github.com/adamtornhill/code-maat) de code-maat, agregar el archivo recien creado (en el root) y proceder a generar el análisis.\n",
"\n",
"## Intrucciones para correrlo con docker:\n",
"\n",
"Paso 1: Construir la imagen de Docker\n",
"Dependiendo del computador les podría tocar hacer cambios al dockerfile (para mac con m1 hay que cambiar la primera linea por `FROM clojure:latest`) pueden revisar las issues del repositorio si es que tienen problemas.\n",
"\n",
"```\n",
" docker build -t code-maat-app .\n",
"```\n",
"\n",
"Paso 2: Correr el comando en un nuevo contenedor para la imagen creada y guardar los resutador en un archivo.\n",
"Una vez creado el contenedor, se pueden correr los comandos con `docker exec` en vez de `docker run` para no levantar otro contenedor.\n",
"\n",
"```\n",
" docker run -v \"$PWD\":/data -it code-maat-app -l /data/logfile.log -c git2 -a age > entity-age.csv\n",
"```\n",
"\n",
"\n",
"\n",
"\n",
"\n",
"\n",
"\n",
"\n",
"\n"
],
"metadata": {
"id": "xoGVQm97ruXP"
}
},
{
"cell_type": "code",
"source": [
"for i in range(9):\n",
" zero_date = (date.today() - relativedelta(months=i)).strftime(\"%Y-%m-%d\")\n",
" print(f'docker run -v \"$PWD\":/data -it code-maat-app -l /data/logfile.log -d {zero_date} -c git2 -a age > entity-age-{i}.csv')"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "tnne2ayhbFMm",
"outputId": "0738e33b-1003-4529-f12c-626444875be3"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"docker run -v \"$PWD\":/data -it code-maat-app -l /data/logfile.log -d 2023-07-18 -c git2 -a age > entity-age-0.csv\n",
"docker run -v \"$PWD\":/data -it code-maat-app -l /data/logfile.log -d 2023-06-18 -c git2 -a age > entity-age-1.csv\n",
"docker run -v \"$PWD\":/data -it code-maat-app -l /data/logfile.log -d 2023-05-18 -c git2 -a age > entity-age-2.csv\n",
"docker run -v \"$PWD\":/data -it code-maat-app -l /data/logfile.log -d 2023-04-18 -c git2 -a age > entity-age-3.csv\n",
"docker run -v \"$PWD\":/data -it code-maat-app -l /data/logfile.log -d 2023-03-18 -c git2 -a age > entity-age-4.csv\n",
"docker run -v \"$PWD\":/data -it code-maat-app -l /data/logfile.log -d 2023-02-18 -c git2 -a age > entity-age-5.csv\n",
"docker run -v \"$PWD\":/data -it code-maat-app -l /data/logfile.log -d 2023-01-18 -c git2 -a age > entity-age-6.csv\n",
"docker run -v \"$PWD\":/data -it code-maat-app -l /data/logfile.log -d 2022-12-18 -c git2 -a age > entity-age-7.csv\n",
"docker run -v \"$PWD\":/data -it code-maat-app -l /data/logfile.log -d 2022-11-18 -c git2 -a age > entity-age-8.csv\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"ages_stats = team_files\n",
"for i in range(9):\n",
" age_month = pd.read_csv(f\"entity-age-{i}.csv\").rename(columns={'age-months': f'age-{i}'})\n",
" ages_stats = ages_stats.merge(age_month, on='entity', how='inner')\n",
"ages_stats['inverse_avg_age'] = 5 - ages_stats.iloc[:,1:].astype(float).mean(axis=1)\n",
"ages_stats = ages_stats[['entity', 'inverse_avg_age']]"
],
"metadata": {
"id": "ekWjnhMZeSvb"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"entity_churn_stats = pd.read_csv(\"entity-churn.csv\")\n",
"entity_churn_stats = entity_churn_stats.merge(team_files, on='entity', how='inner')[['entity', 'commits']]"
],
"metadata": {
"id": "3ELN_mnOrwvi"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"entity_churn_stats['commits'].sort_values(ascending=False, ignore_index=True).quantile(.8)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "NTWpeyL53C_F",
"outputId": "0753cfbe-82e1-43a5-f7ed-a2623d2c8d8c"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"12.0"
]
},
"metadata": {},
"execution_count": 72
}
]
},
{
"cell_type": "code",
"source": [
"entity_churn_stats['commits'].sort_values(ascending=False, ignore_index=True).plot(kind='bar', xticks=[], width=1.0, xlabel='Archivos', ylabel='Commits' )"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 443
},
"id": "PkO7-V-ksA5s",
"outputId": "c62e3df3-d0dd-4864-84dc-734918fb08bd"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"<Axes: xlabel='Archivos', ylabel='Commits'>"
]
},
"metadata": {},
"execution_count": 73
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
],
"image/png": "\n"
},
"metadata": {}
}
]
},
{
"cell_type": "markdown",
"source": [
"# Summary\n",
"\n",
"Mergeamos todas los dataframes anteriores, luego normalizamos y ponderamos para tratar de llevar mucha información a un solo indicador\n"
],
"metadata": {
"id": "SKwFTE7j9j-Z"
}
},
{
"cell_type": "code",
"source": [
"joined_df = sentry_stats.merge(todos_stats,on='entity', how='outer').merge(code_smells_stats,on='entity', how='outer').merge(metric_stats,on='entity', how='outer').merge(coverage_stats,on='entity', how='outer').merge(ages_stats,on='entity', how='outer').merge(entity_churn_stats,on='entity', how='outer')\n",
"filled_df = joined_df.fillna(0)"
],
"metadata": {
"id": "fVWBHUe99tph"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"filled_df"
],
"metadata": {
"id": "bLlcWjHwC8kP"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"s0 = filled_df.iloc[:,1:].astype(float)\n",
"normalized_df = pd.concat([filled_df.iloc[:,:1], (s0 - s0.min()) / (s0.max() - s0.min())], axis=1)\n",
"normalized_df2 = pd.concat([filled_df.iloc[:,:1], (s0 - s0.mean()) / (s0.std())], axis=1)"
],
"metadata": {
"id": "xqiqyAmeCf7I"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"columns_to_average = {'sentry_errors': 5,\n",
" 'todos':5,\n",
" 'code_smells':10,\n",
" 'cognitive_complexity':30,\n",
" 'duplicated_lines_density':10,\n",
" 'ncloc':20,\n",
" 'inverse_coverage':5,\n",
" 'inverse_avg_age':5,\n",
" 'commits':10}\n",
"normalized_df['summary'] = normalized_df.apply(lambda row: sum(row[col] * weight for col, weight in columns_to_average.items()), axis=1)"
],
"metadata": {
"id": "4YuhAMeHGBOH"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"normalized_df.sort_values(by='summary', ascending=False).to_csv(\"normalized_summary.csv\", index=False)"
],
"metadata": {
"id": "hXjVB70Rs1SY"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"normalized_df[['entity','summary']].sort_values(by='summary', ascending=False)"
],
"metadata": {
"id": "y2Ld3gdkDveR"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"corr = normalized_df[columns_to_average.keys()].corr()\n",
"corr.style.background_gradient(cmap='coolwarm')\n"
],
"metadata": {
"id": "wj5c_lWsnvQc"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# Function to update the 'datum' recursively\n",
"def update_datum(node):\n",
" if 'children' in node:\n",
" for child in node['children']:\n",
" update_datum(child)\n",
" node['datum'] += child['datum']\n",
"\n",
"# Esta funcion renderea circulos en base a los directorios en comun,\n",
"# asume que df tiene la columna entity, defines que columna ocupas para el tamaño de los circulos\n",
"# y que columna usar para las tonalidades de los circulos\n",
"\n",
"def render_hierarchy(df, column_size='lines_of_code',colum_color='lines_of_code', limit_red_flag=300, title='Repartition of code length for youngest files', operator='<', filter_by_commits=False, n_commits=30):\n",
" colors_red = list(Color(\"yellow\").range_to(Color(\"red\"), df[colum_color].max()))\n",
" colors_grey = list(Color(\"grey\").range_to(Color(\"black\"),10))\n",
" colors_blue = list(Color(\"lightblue\").range_to(Color(\"darkblue\"), df[colum_color].max()))\n",
" colors_background_circle = list(Color(\"lightblue\").range_to(Color(\"darkblue\"),5))\n",
"\n",
" filtered_df = df[df['entity'].str.startswith('app')]\n",
"\n",
" datum = filtered_df[column_size].sum()\n",
"\n",
" json_data = [{'id': 'app', 'datum': datum, 'color': 0, 'children': []}]\n",
"\n",
" for _, row in filtered_df.iterrows():\n",
" path = row['entity'].split('/')[1:-1]\n",
" current_level = json_data[0]['children']\n",
" for folder in path:\n",
" existing_folder = next((child for child in current_level if child['id'] == folder), None)\n",
" if existing_folder:\n",
" current_level = existing_folder['children']\n",
" else:\n",
" new_folder = {'id': folder, 'datum': 0, 'children': [], 'color': 0}\n",
" current_level.append(new_folder)\n",
" current_level = new_folder['children']\n",
"\n",
" leaf_node = {'id': row['entity'].split('/')[-1], 'datum': row[column_size], 'color': row[colum_color],'commits': row['commits']}\n",
" current_level.append(leaf_node)\n",
"\n",
"\n",
"\n",
" update_datum(json_data[0])\n",
"\n",
" circles = circlify.circlify(\n",
" json_data,\n",
" show_enclosure=False,\n",
" target_enclosure=circlify.Circle(x=0, y=0, r=1)\n",
" )\n",
"\n",
" fig, ax = plt.subplots(figsize=(14,14))\n",
"\n",
" ax.set_title(title)\n",
"\n",
" ax.axis('off')\n",
"\n",
" lim = max(\n",
" max(\n",
" abs(circle.x) + circle.r,\n",
" abs(circle.y) + circle.r,\n",
" )\n",
" for circle in circles\n",
" )\n",
" plt.xlim(-lim, lim)\n",
" plt.ylim(-lim, lim)\n",
"\n",
" for circle in circles:\n",
" if circle.level != 2:\n",
" continue\n",
" x, y, r = circle\n",
" ax.add_patch( plt.Circle((x, y), r, alpha=0.5, linewidth=2, color=\"lightblue\"))\n",
"\n",
" for circle in circles:\n",
" if circle.level != 2:\n",
" continue\n",
" x, y, r = circle\n",
" label = circle.ex[\"id\"]\n",
" plt.annotate(label, (x,y+r ) ,va='center', ha='center', fontsize=12, bbox=dict(facecolor='white', edgecolor='black', boxstyle='round', pad=.5))\n",
"\n",
"\n",
" for i in range(3,10):\n",
" for circle in circles:\n",
" if circle.level != i or eval(f\"x {operator} y\", {}, {'x': circle.ex[\"datum\"], 'y': limit_red_flag}):\n",
" continue\n",
" if circle.ex.get(\"children\"):\n",
" x, y, r = circle\n",
" label = circle.ex[\"id\"]\n",
" if len(circle.ex.get(\"children\")) > 1: plt.annotate(label, (x,y+r) , fontsize=13-i, va='top', ha='center', bbox=dict(facecolor='white', edgecolor='black', boxstyle='round', pad=.5, alpha=0.5))\n",
" ax.add_patch( plt.Circle((x, y), r, alpha=0.1, linewidth=2, color=colors_grey[i].hex))\n",
" else:\n",
" x, y, r = circle\n",
" label = circle.ex[\"id\"]\n",
" try:\n",
" if circle.ex[\"commits\"] <= n_commits and filter_by_commits:\n",
" ax.add_patch( plt.Circle((x, y), r, alpha=0.5, linewidth=2, color=colors_blue[circle.ex[\"color\"]-1].hex))\n",
" else:\n",
" ax.add_patch( plt.Circle((x, y), r, alpha=0.5, linewidth=2, color=colors_red[circle.ex[\"color\"]-1].hex))\n",
" continue\n",
" except IndexError:\n",
" print(circle.ex[\"color\"])\n",
" print(df[colum_color].max())"
],
"metadata": {
"id": "NNOrr0k2sJHC"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"warnings.filterwarnings('ignore')\n",
"logging.getLogger().setLevel(logging.ERROR)\n",
"int0 = filled_df.iloc[:,1:].astype(float).astype(int)\n",
"df = pd.concat([filled_df.iloc[:,:1], int0], axis=1)\n",
"for column in ['sentry_errors', 'todos', 'code_smells', 'cognitive_complexity','duplicated_lines_density', 'inverse_coverage']:\n",
" render_hierarchy(df[df[column] > 0][df['ncloc'] > 0], 'ncloc', column, 20, title=f'Repartition of {column}')\n",
" plt.savefig(f'Repartition of {column}.png')\n",
" render_hierarchy(df[df[column] > 0][df['ncloc'] > 0], 'ncloc', column, 20, title=f'Repartition of {column}', filter_by_commits=True)\n",
" plt.savefig(f'Repartition of {column} (filtered).png')\n"
],
"metadata": {
"id": "8-TPw7NH4rZo"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [],
"metadata": {
"id": "0UmPKdObFWrS"
},
"execution_count": null,
"outputs": []
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment