Skip to content

Instantly share code, notes, and snippets.

@ricalanis
Created November 27, 2016 00:40
Show Gist options
  • Save ricalanis/201ff8caaef08befdf1d6fb421ecf481 to your computer and use it in GitHub Desktop.
Save ricalanis/201ff8caaef08befdf1d6fb421ecf481 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import requests as rq\n",
"from fuzzywuzzy import fuzz\n",
"from time import sleep,gmtime, strftime\n",
"from nltk.tokenize import word_tokenize"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def get_match(word, series):\n",
" response = np.array([])\n",
" for string in series:\n",
" ratio = fuzz.token_set_ratio(word, string)\n",
" #print(string,ratio)\n",
" if ratio > 95:\n",
" response = np.append(response,True)\n",
" else:\n",
" response = np.append(response,False)\n",
" return response"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def write_xlsx(filename, dataframe_list, compras):\n",
" writer = pd.ExcelWriter(filename, engine='xlsxwriter')\n",
" # Convert the dataframe to an XlsxWriter Excel object.\n",
" i = 0\n",
" for dataframe_indv in dataframe_list:\n",
" dataframe_indv.to_excel(writer, sheet_name=compras[i],index=False)\n",
" i = i +1 \n",
" writer.save()\n",
" return None"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def today_date():\n",
" date = strftime(\"%d%m%Y\", gmtime())\n",
" return date"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"departements = [\"despensa\",\"lacteos\",\"frutas-y-verduras\",\"carnes-y-pescados\",\"salchichoneria\",\"panaderia-y-tortilleria\",\"bebidas\",\"vinos-y-licores\",\"congelados\",\"limpieza-y-mascotas\",\"bebes\",\"farmacia\",\"higiene-y-belleza\"]"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"Frutas = [\"Arándano\",\"Frambuesa\",\"Fresa\",\"Zarzamora\",\"Pomelo\",\"Limón\",\"Mandarina\",\"Naranja\",\"Pomelo\",\"Melón\",\"Sandía\",\"Aguacate\",\"Carambola\",\"Chirimoya\",\"Coco\",\"Dátil\",\"Kiwi\",\"Litchi\",\"Mango\",\"Papaya\",\"Piña\",\"Plátano\",\"Higo\",\"Albaricoque\",\"Cereza\",\"Ciruela\",\"Higo\",\"Kaki\",\"Manzana\",\"Melocotón\",\"Nectarina\",\"Níspero\",\"Pera\",\"Uva\",\"Castaña\",\"Almendra\",\"Avellana\",\"Cacahuete\",\"Castaña\",\"Nuez\",\"Pistacho\"]"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"Compras = [\"mezcla almendra\",\"Avena\",\"Tortilla\",\"Pan Integral\",\"Fruta\",\"Griego\",\"Cottage\",\"Ensalada\",\"Mezcla Verduras Campesina\",\"Arroz Knorr\",\"Milanesa Pollo\",\"Pechuga Pollo\",\"Atún\",\"Salmón\",\"Queso Oaxaca\",\"Huevo\",\"Clara Huevo\",\"Leche\"]"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": false,
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"despensa\n",
"lacteos\n",
"frutas-y-verduras\n",
"carnes-y-pescados\n",
"salchichoneria\n",
"panaderia-y-tortilleria\n",
"bebidas\n",
"vinos-y-licores\n",
"congelados\n",
"limpieza-y-mascotas\n",
"bebes\n",
"farmacia\n",
"higiene-y-belleza\n"
]
}
],
"source": [
"Products_List= []\n",
"for department in departements:\n",
" print(department)\n",
" r = rq.get(\"https://www.walmart.com.mx/super/WebControls/hlSearch.ashx?Text=&Departamento=\"+department+\"&marca=0\")\n",
" sleep(1)\n",
" Products_List= Products_List + r.json()[\"Products\"]\n"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"DF_Products = pd.DataFrame(Products_List)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"DF_Products_Price_Product = DF_Products[[\"DepartmentName\",\"Description\",\"PrecioNumerico\"]]\n",
"DF_Products_Price_Product_Frutas = DF_Products_Price_Product[DF_Products_Price_Product[\"DepartmentName\"]==\"d-frutas-y-verduras\"]"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Finished.\n"
]
}
],
"source": [
"date = today_date()\n",
"DF_Products_Price_Product.to_csv(date+\".csv\")\n",
"DF_Products.to_csv(date+\"_detailed.csv\")\n",
"print(\"Finished.\")"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"mezcla almendra\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/usr/local/lib/python3.4/site-packages/ipykernel/__main__.py:19: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Avena\n",
"Tortilla\n",
"Pan Integral\n",
"Fruta\n",
"Griego\n",
"Cottage\n",
"Ensalada\n",
"Mezcla Verduras Campesina\n",
"Arroz Knorr\n",
"Milanesa Pollo\n",
"Pechuga Pollo\n",
"Atún\n",
"Salmón\n",
"Queso Oaxaca\n",
"Huevo\n",
"Clara Huevo\n",
"Leche\n"
]
}
],
"source": [
"Opciones = []\n",
"\n",
"for compra in Compras: \n",
" print(compra)\n",
" df_opciones = pd.DataFrame()\n",
" if compra == \"Fruta\":\n",
" for fruta in Frutas:\n",
" df_opciones = pd.concat([df_opciones,DF_Products_Price_Product_Frutas[get_match(fruta,DF_Products_Price_Product_Frutas[\"Description\"])==1]])\n",
" else:\n",
" palabras = word_tokenize(compra)\n",
" i = 0\n",
" for palabra in palabras:\n",
" if i == 0:\n",
" condiciones = get_match(palabra,DF_Products_Price_Product[\"Description\"])==1\n",
" else:\n",
" condiciones = [a and b for a,b in zip(condiciones,get_match(palabra,DF_Products_Price_Product[\"Description\"])==1)]\n",
" i = i +1\n",
" df_opciones = pd.concat([df_opciones,DF_Products_Price_Product[condiciones]])\n",
" Opciones.append(df_opciones.sort(\"PrecioNumerico\", ascending=True))\n",
" write_xlsx(\"compras\"+date+\".xlsx\",Opciones, Compras)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.4.1"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment