Skip to content

Instantly share code, notes, and snippets.

@jjsantos01
Last active April 20, 2023 17:53
Show Gist options
  • Save jjsantos01/a09d44e7de76927f3101969e985d1d84 to your computer and use it in GitHub Desktop.
Save jjsantos01/a09d44e7de76927f3101969e985d1d84 to your computer and use it in GitHub Desktop.
Script to scrape the results for searching approved medicines by the Mexican medicines regulator (COFEPRIS): https://tramiteselectronicos02.cofepris.gob.mx/BuscadorPublicoRegistrosSanitarios/BusquedaRegistroSanitario.aspx
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "7b94286a-5830-4df2-89e8-68a6f60ff33a",
"metadata": {},
"outputs": [],
"source": [
"import requests\n",
"from bs4 import BeautifulSoup\n",
"import pandas as pd # not necessary, only to show final results"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "4d780604-59c3-474b-ba86-23fd547a0d7c",
"metadata": {},
"outputs": [],
"source": [
"class CofeprisScrap:\n",
"\n",
" def __init__(self):\n",
" self.url = 'https://tramiteselectronicos02.cofepris.gob.mx/BuscadorPublicoRegistrosSanitarios/BusquedaRegistroSanitario.aspx'\n",
" self.headers = {\"user-agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36\"}\n",
" self.auth_params = self.get_auth_params()\n",
"\n",
" def get_auth_params(self) -> dict:\n",
" \"\"\"\n",
" Cofepris website requires the __VIEWSTATE and __EVENTVALIDATION params to consider the request as valid.\n",
" For searching medicines, two initial requests are needed to get these parameters.\n",
" After getting these parameters, they can be used to make any search in the \"Denominación distintiva\" form.\n",
" \"\"\"\n",
" resp0 = requests.get(self.url, headers=self.headers)\n",
" soup0 = BeautifulSoup(resp0.text, 'html.parser')\n",
" data1 = {\n",
" \"__VIEWSTATE\": soup0.select_one('input[id=\"__VIEWSTATE\"]')['value'],\n",
" \"__VIEWSTATEENCRYPTED\": \"\",\n",
" \"__EVENTVALIDATION\": soup0.select_one('input[id=\"__EVENTVALIDATION\"]')['value'],\n",
" \"ctl00$MainContent$DDL_Argumento\": \"2\",\n",
" \"ctl00$MainContent$txtSearchValue\": \"\",\n",
" }\n",
" resp1 = requests.post(self.url, headers=self.headers, data=data1)\n",
" soup1 = BeautifulSoup(resp1.text, 'html.parser')\n",
" params = {\n",
" \"__VIEWSTATE\": soup1.select_one('input[id=\"__VIEWSTATE\"]')['value'],\n",
" \"__EVENTVALIDATION\": soup1.select_one('input[id=\"__EVENTVALIDATION\"]')['value'],\n",
" }\n",
"\n",
" return params\n",
"\n",
" def search_medicine(self, medicine: str) -> list:\n",
" data = {\n",
" \"__EVENTTARGET\": \"ctl00$MainContent$ctl00\",\n",
" \"__VIEWSTATE\": self.auth_params[\"__VIEWSTATE\"],\n",
" \"__VIEWSTATEENCRYPTED\": \"\",\n",
" \"__EVENTVALIDATION\": self.auth_params[\"__EVENTVALIDATION\"],\n",
" \"ctl00$MainContent$DDL_Argumento\": \"2\",\n",
" \"ctl00$MainContent$txtSearchValue\": medicine,\n",
" }\n",
"\n",
" resp = requests.post(self.url, headers=self.headers, data=data)\n",
" return resp.text"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "37a58405-92de-469b-b34a-296e625b8126",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Número de Registro</th>\n",
" <th>Denominación Genérica</th>\n",
" <th>Denominación Distintiva</th>\n",
" <th>Tipo de Medicamento</th>\n",
" <th>Indicación Terapéutica</th>\n",
" <th>Titular del Registro</th>\n",
" <th>Fabricante del Medicamento</th>\n",
" <th>Principio Activo</th>\n",
" <th>Unnamed: 8</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>091M88 SSA</td>\n",
" <td>Subsalicilato de bismuto</td>\n",
" <td>PEPTO BISMOL</td>\n",
" <td>No Aplica</td>\n",
" <td>Alivio de los síntomas digestivos comunes como...</td>\n",
" <td>PROCTER &amp; GAMBLE MANUFACTURING MEXICO, S. DE R...</td>\n",
" <td>Procter &amp; Gamble Manufacturing México, S. de R...</td>\n",
" <td>A07BB99 Otros principios activos. Preparados c...</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>30310 SSA</td>\n",
" <td>Subsalicilato de bismuto</td>\n",
" <td>PEPTO BISMOL</td>\n",
" <td>De referencia</td>\n",
" <td>Antiflatulento, antiemético, antidiarréico, ac...</td>\n",
" <td>PROCTER &amp; GAMBLE MANUFACTURING MEXICO, S. DE R...</td>\n",
" <td>The Procter &amp; Gamble Manufacturing Company.</td>\n",
" <td>A07BB99 Otros principios activos. Preparados c...</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>263M2003 SSA</td>\n",
" <td>Subsalicilato de bismuto</td>\n",
" <td>PEPTO-BISMOL</td>\n",
" <td>Genérico</td>\n",
" <td>Alivio de los síntomas digestivos comunes como...</td>\n",
" <td>PROCTER &amp; GAMBLE MANUFACTURA, S. DE R.L. DE C.V.</td>\n",
" <td>Procter &amp; Gamble Manufactura, S. de R.L. de C.V.</td>\n",
" <td>A07BB99 Otros principios activos. Preparados c...</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Número de Registro Denominación Genérica Denominación Distintiva \\\n",
"0 091M88 SSA Subsalicilato de bismuto PEPTO BISMOL \n",
"1 30310 SSA Subsalicilato de bismuto PEPTO BISMOL \n",
"2 263M2003 SSA Subsalicilato de bismuto PEPTO-BISMOL \n",
"\n",
" Tipo de Medicamento Indicación Terapéutica \\\n",
"0 No Aplica Alivio de los síntomas digestivos comunes como... \n",
"1 De referencia Antiflatulento, antiemético, antidiarréico, ac... \n",
"2 Genérico Alivio de los síntomas digestivos comunes como... \n",
"\n",
" Titular del Registro \\\n",
"0 PROCTER & GAMBLE MANUFACTURING MEXICO, S. DE R... \n",
"1 PROCTER & GAMBLE MANUFACTURING MEXICO, S. DE R... \n",
"2 PROCTER & GAMBLE MANUFACTURA, S. DE R.L. DE C.V. \n",
"\n",
" Fabricante del Medicamento \\\n",
"0 Procter & Gamble Manufacturing México, S. de R... \n",
"1 The Procter & Gamble Manufacturing Company. \n",
"2 Procter & Gamble Manufactura, S. de R.L. de C.V. \n",
"\n",
" Principio Activo Unnamed: 8 \n",
"0 A07BB99 Otros principios activos. Preparados c... NaN \n",
"1 A07BB99 Otros principios activos. Preparados c... NaN \n",
"2 A07BB99 Otros principios activos. Preparados c... NaN "
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"c = CofeprisScrap()\n",
"resultados = c.search_medicine('pepto')\n",
"pd.read_html(resultados)[0]"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "26faed4c-4a95-4c6f-8044-d213d568cc22",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Número de Registro</th>\n",
" <th>Denominación Genérica</th>\n",
" <th>Denominación Distintiva</th>\n",
" <th>Tipo de Medicamento</th>\n",
" <th>Indicación Terapéutica</th>\n",
" <th>Titular del Registro</th>\n",
" <th>Fabricante del Medicamento</th>\n",
" <th>Principio Activo</th>\n",
" <th>Unnamed: 8</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>305M2018 SSA</td>\n",
" <td>Paracetamol / Fenilefrina / Clorfenamina</td>\n",
" <td>THERAFLU THERAPILLIS</td>\n",
" <td>Genérico</td>\n",
" <td>Auxiliar en el tratamiento sintomático del res...</td>\n",
" <td>GLAXOSMITHKLINE CONSUMER HEALTHCARE MEXICO, S....</td>\n",
" <td>GlaxoSmithKline Panamá, S.A.</td>\n",
" <td>R01BA53 Fenilefrina, combinations</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>202M2016 SSA</td>\n",
" <td>Paracetamol / Fenilefrina</td>\n",
" <td>THERAFLU VAP</td>\n",
" <td>De referencia</td>\n",
" <td>Auxiliar en el tratamiento sintomático del res...</td>\n",
" <td>GLAXOSMITHKLINE CONSUMER HEALTHCARE MEXICO, S....</td>\n",
" <td>SmithKline Beecham, S.A.</td>\n",
" <td>N02BE01 Paracetamol.</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>026M2016 SSA</td>\n",
" <td>Paracetamol / Dextrometorfano / Fenilefrina</td>\n",
" <td>THERAFLU DAYTIME</td>\n",
" <td>De referencia</td>\n",
" <td>Auxiliar en el alivio de los síntomas del resf...</td>\n",
" <td>GLAXOSMITHKLINE CONSUMER HEALTHCARE MEXICO, S....</td>\n",
" <td>GSK Consumer Health, Inc.</td>\n",
" <td>R05FA99 Otros principios activos. Derivados de...</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>318M94 SSA</td>\n",
" <td>Paracetamol / Feniramina / Fenilefrina</td>\n",
" <td>THERAFLU</td>\n",
" <td>De referencia</td>\n",
" <td>Auxiliar en el tratamiento sintomatico del res...</td>\n",
" <td>GLAXOSMITHKLINE CONSUMER HEALTHCARE MEXICO, S....</td>\n",
" <td>Famar S.A. Anthoussa Plant</td>\n",
" <td>N02BE51 Paracetamol, combinaciones excluyendo ...</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>093M2002 SSA</td>\n",
" <td>Paracetamol / Fenilefrina</td>\n",
" <td>THERAFLU TD</td>\n",
" <td>De referencia</td>\n",
" <td>Auxiliar en el tratamiento sintomático del res...</td>\n",
" <td>GLAXOSMITHKLINE CONSUMER HEALTHCARE MEXICO, S....</td>\n",
" <td>Delpharm Orleans</td>\n",
" <td>N02BE51 Paracetamol, combinaciones excluyendo ...</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Número de Registro Denominación Genérica \\\n",
"0 305M2018 SSA Paracetamol / Fenilefrina / Clorfenamina \n",
"1 202M2016 SSA Paracetamol / Fenilefrina \n",
"2 026M2016 SSA Paracetamol / Dextrometorfano / Fenilefrina \n",
"3 318M94 SSA Paracetamol / Feniramina / Fenilefrina \n",
"4 093M2002 SSA Paracetamol / Fenilefrina \n",
"\n",
" Denominación Distintiva Tipo de Medicamento \\\n",
"0 THERAFLU THERAPILLIS Genérico \n",
"1 THERAFLU VAP De referencia \n",
"2 THERAFLU DAYTIME De referencia \n",
"3 THERAFLU De referencia \n",
"4 THERAFLU TD De referencia \n",
"\n",
" Indicación Terapéutica \\\n",
"0 Auxiliar en el tratamiento sintomático del res... \n",
"1 Auxiliar en el tratamiento sintomático del res... \n",
"2 Auxiliar en el alivio de los síntomas del resf... \n",
"3 Auxiliar en el tratamiento sintomatico del res... \n",
"4 Auxiliar en el tratamiento sintomático del res... \n",
"\n",
" Titular del Registro \\\n",
"0 GLAXOSMITHKLINE CONSUMER HEALTHCARE MEXICO, S.... \n",
"1 GLAXOSMITHKLINE CONSUMER HEALTHCARE MEXICO, S.... \n",
"2 GLAXOSMITHKLINE CONSUMER HEALTHCARE MEXICO, S.... \n",
"3 GLAXOSMITHKLINE CONSUMER HEALTHCARE MEXICO, S.... \n",
"4 GLAXOSMITHKLINE CONSUMER HEALTHCARE MEXICO, S.... \n",
"\n",
" Fabricante del Medicamento \\\n",
"0 GlaxoSmithKline Panamá, S.A. \n",
"1 SmithKline Beecham, S.A. \n",
"2 GSK Consumer Health, Inc. \n",
"3 Famar S.A. Anthoussa Plant \n",
"4 Delpharm Orleans \n",
"\n",
" Principio Activo Unnamed: 8 \n",
"0 R01BA53 Fenilefrina, combinations NaN \n",
"1 N02BE01 Paracetamol. NaN \n",
"2 R05FA99 Otros principios activos. Derivados de... NaN \n",
"3 N02BE51 Paracetamol, combinaciones excluyendo ... NaN \n",
"4 N02BE51 Paracetamol, combinaciones excluyendo ... NaN "
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"resultados = c.search_medicine('theraflu')\n",
"pd.read_html(resultados)[0]"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "b53d17e4-816c-444a-8b83-9e1aada68c18",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Número de Registro</th>\n",
" <th>Denominación Genérica</th>\n",
" <th>Denominación Distintiva</th>\n",
" <th>Tipo de Medicamento</th>\n",
" <th>Indicación Terapéutica</th>\n",
" <th>Titular del Registro</th>\n",
" <th>Fabricante del Medicamento</th>\n",
" <th>Principio Activo</th>\n",
" <th>Unnamed: 8</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>229M2021 SSA</td>\n",
" <td>Dropropizina</td>\n",
" <td>LETOAZIN</td>\n",
" <td>Genérico</td>\n",
" <td>Antitusivo</td>\n",
" <td>NOVAG INFANCIA, S.A. DE C.V.</td>\n",
" <td>Novag Infancia, S.A. de C.V.</td>\n",
" <td>R05DB19 Dropropizina</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Número de Registro Denominación Genérica Denominación Distintiva \\\n",
"0 229M2021 SSA Dropropizina LETOAZIN \n",
"\n",
" Tipo de Medicamento Indicación Terapéutica Titular del Registro \\\n",
"0 Genérico Antitusivo NOVAG INFANCIA, S.A. DE C.V. \n",
"\n",
" Fabricante del Medicamento Principio Activo Unnamed: 8 \n",
"0 Novag Infancia, S.A. de C.V. R05DB19 Dropropizina NaN "
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"resultados = c.search_medicine('letoazin')\n",
"pd.read_html(resultados)[0]"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "921bd72e-bcb5-4cfb-9b7c-d78051e211a3",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Número de Registro</th>\n",
" <th>Denominación Genérica</th>\n",
" <th>Denominación Distintiva</th>\n",
" <th>Tipo de Medicamento</th>\n",
" <th>Indicación Terapéutica</th>\n",
" <th>Titular del Registro</th>\n",
" <th>Fabricante del Medicamento</th>\n",
" <th>Principio Activo</th>\n",
" <th>Unnamed: 8</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>416M2015 SSA</td>\n",
" <td>Fexofenadina</td>\n",
" <td>PIRQUET</td>\n",
" <td>Genérico</td>\n",
" <td>Rinitis Alérgica, Urticaria idiopática crónica.</td>\n",
" <td>LABORATORIOS PISA, S.A. DE C.V.</td>\n",
" <td>Laboratorios Pisa S.A. de C.V. Laboratorios Pi...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Número de Registro Denominación Genérica Denominación Distintiva \\\n",
"0 416M2015 SSA Fexofenadina PIRQUET \n",
"\n",
" Tipo de Medicamento Indicación Terapéutica \\\n",
"0 Genérico Rinitis Alérgica, Urticaria idiopática crónica. \n",
"\n",
" Titular del Registro \\\n",
"0 LABORATORIOS PISA, S.A. DE C.V. \n",
"\n",
" Fabricante del Medicamento Principio Activo \\\n",
"0 Laboratorios Pisa S.A. de C.V. Laboratorios Pi... NaN \n",
"\n",
" Unnamed: 8 \n",
"0 NaN "
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"resultados = c.search_medicine('Pirquet')\n",
"pd.read_html(resultados)[0]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d3e20014-23d9-48a3-b7d0-10094f28911b",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "e8acf85a-af76-4f0f-a569-c962a960635e",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "b139dfd6-0356-481b-9cf2-cf71a81439bc",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.8"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment