Skip to content

Instantly share code, notes, and snippets.

@BurgosNY
Created January 3, 2022 12:27
Show Gist options
  • Save BurgosNY/7a4c880051f965bab4915932346a9610 to your computer and use it in GitHub Desktop.
Save BurgosNY/7a4c880051f965bab4915932346a9610 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 62,
"id": "ddc8fe73",
"metadata": {},
"outputs": [],
"source": [
"from IPython.display import clear_output\n",
"from bs4 import BeautifulSoup as bs\n",
"from airtable import Airtable\n",
"import requests\n",
"\n",
"\n",
"airtable_api = '' # Sequência de 16 dígitos. Vá em Account > API para pegar o seu.\n",
"table_id = 'appEc9XLglmx5igrn' # É o que vem após a primeira \"/\" da url.\n",
"airtable = Airtable(table_id, 'acervo', airtable_api)"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "bb08d806",
"metadata": {},
"outputs": [],
"source": [
"# Functions\n",
"\n",
"# Essa função aceita um texto (tipo \"Puerto Rico\") e traz uma lista de dicionários \n",
"# com informações desses jogos, para que seja fácil descobrir o \"bgg_id\" dele.\n",
"def search(query):\n",
" url = f'https://www.boardgamegeek.com/xmlapi/search?search={query}'\n",
" r = requests.get(url)\n",
" soup = bs(r.content, \"lxml\")\n",
" data = []\n",
" for i, bg in enumerate(soup.find_all(\"boardgame\")):\n",
" try:\n",
" bgg_id = int(bg['objectid'])\n",
" name = bg.find(\"name\").text\n",
" year = bg.find(\"yearpublished\").text\n",
" result = i\n",
" link = f'https://boardgamegeek.com/boardgame/{bgg_id}'\n",
" obj = {\"bgg_id\": bgg_id, \"name\": name, \"year\": year, \"result\": result, \"link\": link}\n",
" data.append(obj)\n",
" except AttributeError:\n",
" continue\n",
" return data\n",
"\n",
"# Dado um bgg_id, puxa alguns dados. É possível pegar vários outros\n",
"# Ver mais em: https://boardgamegeek.com/wiki/page/BGG_XML_API2\n",
"def game_info(bgg_id):\n",
" url = f'https://www.boardgamegeek.com/xmlapi2/thing?id={bgg_id}&stats=1'\n",
" r = requests.get(url)\n",
" soup = bs(r.content, \"lxml\")\n",
" nome = soup.find(\"name\", {\"type\": \"primary\"})['value']\n",
" rank = int(soup.find(\"ranks\").find(\"rank\", {\"name\": \"boardgame\"})['value'])\n",
" nota = float(soup.find(\"ratings\").find(\"average\")['value'])\n",
" age_poll = soup.find(\"poll\", {\"name\": \"suggested_playerage\"})\n",
" age_poll_results = [(int(x['numvotes']), x['value']) for x in age_poll.find_all(\"result\")]\n",
" idade = max(age_poll_results)[1]\n",
" author_list = soup.find_all(\"link\", {\"type\": \"boardgamedesigner\"})\n",
" autores = ', '.join([x['value'] for x in author_list])\n",
" thumb_url = soup.find(\"thumbnail\").text\n",
" obj = {\"jogo\": nome, \"rank\": rank, \"nota_bgg\": nota, \"idade\": idade,\n",
" \"autores\": autores, \"thumbnail\": [{\"url\": thumb_url}]}\n",
" return obj\n",
" "
]
},
{
"cell_type": "markdown",
"id": "f85f9cf1",
"metadata": {},
"source": [
"### Atualizando table pela primeira vez\n",
"\n",
"Subimos um csv criado manualmente para o Airtable, chamamos de \"acervo\" e trabalhamos daí "
]
},
{
"cell_type": "code",
"execution_count": 79,
"id": "79a4a17f",
"metadata": {},
"outputs": [],
"source": [
"def first_dump()\n",
" acervo = airtable.get_all()\n",
"\n",
" for jogo in acervo:\n",
" if not jogo['fields'].get(\"bgg_id\"):\n",
" query = jogo['fields']['search_query']\n",
" print(query)\n",
" pesquisa = search(query)\n",
" if not pesquisa:\n",
" continue\n",
" for r in pesquisa:\n",
" print(f'{r[\"result\"]} {r[\"name\"]} ({r[\"year\"]})')\n",
" print(r['link'])\n",
" print('\\n')\n",
" print('\\n\\n')\n",
" correto = input(\"which one?\")\n",
" bgg_id = pesquisa[int(correto)]['bgg_id']\n",
" airtable.update(jogo['id'], {\"bgg_id\": bgg_id})\n",
" clear_output(wait=True)\n"
]
},
{
"cell_type": "markdown",
"id": "3b201d62",
"metadata": {},
"source": [
"### Usando dados do BGG na tabela em questão:\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "fb9de5a1",
"metadata": {},
"outputs": [],
"source": [
"acervo = airtable.get_all()\n",
"for jogo in acervo:\n",
" if not 'rank' in jogo['fields']:\n",
" print(jogo)\n",
" game = game_info(jogo['fields']['bgg_id'])\n",
" airtable.update(jogo['id'], game)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment