Skip to content

Instantly share code, notes, and snippets.

@eduardompinto
Last active August 17, 2016 18:06
Show Gist options
  • Save eduardompinto/5479670e47f7997f4353432475226528 to your computer and use it in GitHub Desktop.
Save eduardompinto/5479670e47f7997f4353432475226528 to your computer and use it in GitHub Desktop.
Criando uma cara para contar relações de quem fez X, também fez X

Relacionamento de quem fez X, também fez X

O objetivo aqui é pegar uma entrada composta por um csv com: produto, sessão. E considerando que a ação X foi aplicada a ele criar um cara que pega as relações por sessão.

Exemplo de entrada:

1231312124,cb808cb9-3bfd-4c0c-ad20-2e181b9b3a93
1231212124,cb808cb9-3bfd-4c0c-ad20-2e181b9b3a93
1231112124,cb808cb9-3bfd-4c0c-ad20-2e181b9b3a93
1231762124,cb808cb9-3bfd-4c0c-ad20-2e181b9b3a93

A saida vai mapear:

Produto: [products: numero_de_vezes_que_ocorreu]

Exemplo de saída:

{"5767613": {"3096140": 2, "2068884": 2, "3252281": 2, "2333267": 2, "3654870": 2 (...)}

Tudo foi feito usando apenas os built-in do python. Ainda tem muito a ser melhorado, mas é um exemplo bem didático

Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import uuid\n",
"import random\n",
"\n",
"from collections import Counter\n",
"from functools import reduce\n",
"\n",
"\n",
"def _entries_generator():\n",
" entry = ''\n",
" products = [str(random.randrange(1000000, 9000000)) for _ in range(201)]\n",
" for _ in range(random.randrange(50, 100)):\n",
" entry += _entry_generator(products)\n",
" return entry\n",
"\n",
"\n",
"def _entry_generator(products):\n",
" session = str(uuid.uuid4())\n",
" number_of_interactions = random.randrange(1, 30)\n",
" entry = ''\n",
" for _ in range(number_of_interactions):\n",
" entry += '{product},{session}\\n'.format(\n",
" product=products[random.randrange(0, 200)],\n",
" session=session\n",
" )\n",
"\n",
" return entry\n",
"\n",
"\n",
"def _format_entry(entry, data):\n",
" if data:\n",
" product_id, session_id = data.split(',')\n",
"\n",
" data = {\n",
" 'product_id': product_id,\n",
" }\n",
"\n",
" if session_id in entry:\n",
" entry[session_id].append(data)\n",
" else:\n",
" entry[session_id] = [data]\n",
"\n",
" return entry\n",
"\n",
"\n",
"def _wxax(entry, data):\n",
" if data:\n",
" products = [d['product_id'] for d in data]\n",
"\n",
" for product in products:\n",
" if product in entry:\n",
" entry[product].extend([p for p in products if p != product])\n",
" else:\n",
" entry[product] = [p for p in products if p != product]\n",
" return entry\n",
"\n",
"\n",
"# def _score(rec):\n",
"# scored = {}\n",
"# for k, value in rec.items():\n",
"# model = collections.defaultdict(lambda: 1)\n",
"# for v in value:\n",
"# if v != k:\n",
"# model[v] += 1\n",
"# scored[k] = model\n",
"# return scored\n",
"\n",
"\n",
"def run():\n",
" views = _entries_generator().split('\\n')\n",
" formated = reduce(_format_entry, views, {})\n",
" wxax = {\n",
" k: v for k, v in reduce(_wxax, formated.values(), {}).items()\n",
" if len(v) > 0\n",
" }\n",
"\n",
" return {k: Counter(v) for k, v in wxax.items()}\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
""
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3.0
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.2"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment