Last active
March 22, 2020 22:03
-
-
Save pallada-92/6550210845416903455ae4657d05dd0e to your computer and use it in GitHub Desktop.
Aura top commenters visualization
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Визуализация самых активных комментаторов Ауры" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Этот ноутбук не предполагается для запуска т.к. использует внешние файлы с данными пользователей Ауры, которые были собраны мной вручную. Тем не менее, он может быть полезен для изучения наивного алгоритма укладки кругов или квадратов, а также вывода изображения при помощи cairo." | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Алгоритм укладки кругов — моя модификация Wordle. Он просто пытается поместить фигуры в случайные места изображения до тех пор, пока они не будут пересекаться с предыдущими фигурами. Радиус, в котором производится поиск, постепенно увеличивается. Отличие от Wordle в том, что вместо детерминированной спирали используется генератор случайных чисел." | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Описание Worlde можно найти тут: http://static.mrfeinberg.com/bv_ch03.pdf" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Load data" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 195, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import json\n", | |
"from collections import Counter\n", | |
"\n", | |
"with open('../aura/db/user_info.json', encoding='utf-8') as f:\n", | |
" user_info_db = json.load(f)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Load avatars" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 197, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import requests\n", | |
"\n", | |
"from IPython.display import display\n", | |
"from PIL import Image\n", | |
"from io import BytesIO\n", | |
"from tqdm import tqdm_notebook as tqdm\n", | |
"\n", | |
"import os\n", | |
"\n", | |
"size = 200\n", | |
"os.makedirs(f'../aura/avatars/{size}', exist_ok=1)\n", | |
"for ui in tqdm(user_info_db.values()):\n", | |
" uid = ui['id']\n", | |
" assert ui['photo_url'] == ui['ava_url']\n", | |
" if ui['photo_url'] == '':\n", | |
" print(ui['url'])\n", | |
" continue\n", | |
" photo_url = '/'.join(ui['photo_url'].split('/')[:-1]) + f'/{size}'\n", | |
" fpath = f'../aura/avatars/{size}/{uid}.jfif'\n", | |
" if os.path.exists(fpath):\n", | |
" continue\n", | |
" req = requests.get(photo_url)\n", | |
" with open(fpath, 'wb') as f:\n", | |
" f.write(req.content)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Layout" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 199, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def make_circ(x, y, r):\n", | |
" return ('circ', x, y, r)\n", | |
"\n", | |
"def make_rect(cx, cy, w, h):\n", | |
" return ('rect', cx, cy, w, h)\n", | |
"\n", | |
"def check_intersect(shape1, shape2):\n", | |
" if shape1 is None or shape2 is None:\n", | |
" return False\n", | |
" if shape1[0] == 'circ' and shape2[0] == 'circ':\n", | |
" s1, x1, y1, r1 = shape1\n", | |
" s2, x2, y2, r2 = shape2\n", | |
" return (x2 - x1) ** 2 + (y2 - y1) ** 2 <= (r1 + r2) ** 2\n", | |
" if shape1[0] == 'rect' and shape2[0] == 'rect':\n", | |
" s1, cx1, cy1, w1, h1 = shape1\n", | |
" s2, cx2, cy2, w2, h2 = shape2\n", | |
" return abs(cx1 - cx2) * 2 <= w1 + w2 and abs(cy1 - cy2) * 2 <= h1 + h2\n", | |
" raise NotImplementedError()\n", | |
"\n", | |
"import random\n", | |
"import math\n", | |
"\n", | |
"def place_shape(placed_shapes, new_shape, max_r=10, trials=100, mode='circ'):\n", | |
" ox, oy = new_shape[1], new_shape[2]\n", | |
" candidate_shape = list(new_shape)\n", | |
" for trial in range(trials):\n", | |
" cur_r = (trial / trials) * 0.5 * max_r\n", | |
" if mode == 'circ':\n", | |
" cur_a = random.random() * math.pi * 2\n", | |
" dx, dy = cur_r * math.cos(cur_a), cur_r * math.sin(cur_a)\n", | |
" else:\n", | |
" dx = cur_r * (random.random() * 2 - 1)\n", | |
" dy = cur_r * (random.random() * 2 - 1)\n", | |
" candidate_shape[1] = ox + dx\n", | |
" candidate_shape[2] = oy + dy\n", | |
" for other_shape in placed_shapes:\n", | |
" if check_intersect(candidate_shape, other_shape):\n", | |
" break\n", | |
" else:\n", | |
" return tuple(candidate_shape)\n", | |
" return None\n", | |
"\n", | |
"def place_shapes(shapes, max_r=10, trials=100, mode='circ'):\n", | |
" random.seed(0)\n", | |
" placed_shapes = []\n", | |
" for shape in shapes:\n", | |
" placed_shape = place_shape(placed_shapes, shape, max_r=max_r, trials=trials, mode=mode)\n", | |
" placed_shapes.append(placed_shape)\n", | |
" return placed_shapes\n", | |
"\n", | |
"def shapes_bbox(shapes):\n", | |
" x0 = y0 = float('inf')\n", | |
" x1 = y1 = float('-inf')\n", | |
" for shape in shapes:\n", | |
" if shape[0] == 'rect':\n", | |
" x, y, w, h = shape[1:]\n", | |
" elif shape[0] == 'circ':\n", | |
" x, y, r = shape[1:]\n", | |
" w = h = 2 * r\n", | |
" else:\n", | |
" raise NotImplementedError()\n", | |
" x0 = min(x0, x - w / 2)\n", | |
" y0 = min(y0, y - h / 2)\n", | |
" x1 = max(x1, x + w / 2)\n", | |
" y1 = max(y1, y + h / 2)\n", | |
" return x0, y0, x1 - x0, y1 - y0\n", | |
"\n", | |
"def pad_bbox(bbox, pad):\n", | |
" x, y, w, h = bbox\n", | |
" return x - pad, y - pad, w + pad * 2, h + pad * 2" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Text metrics" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 200, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import cairo\n", | |
"\n", | |
"def set_font(ctx):\n", | |
" ctx.select_font_face('Courier New', cairo.FONT_SLANT_NORMAL, cairo.FONT_WEIGHT_BOLD)\n", | |
"\n", | |
"text_metrics = {}\n", | |
"text_metrics_ctx = cairo.Context(cairo.ImageSurface(cairo.FORMAT_ARGB32, 100, 100))\n", | |
"set_font(text_metrics_ctx)\n", | |
"text_metrics_ctx.set_font_size(100)\n", | |
"\n", | |
"def get_text_width(text, size):\n", | |
" if text not in text_metrics:\n", | |
" print('recalc')\n", | |
" tx = text_metrics_ctx.text_extents(text)\n", | |
" w = tx.width\n", | |
" text_metrics[text] = w\n", | |
" return text_metrics[text] * size / 100" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Prepare users data" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 201, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"with open('../aura/db/comments.json', encoding='utf-8') as f:\n", | |
" comments_db = json.load(f)\n", | |
"\n", | |
"comments_by_author = {}\n", | |
"for comment in comments_db.values():\n", | |
" u_id = comment['author']['id']\n", | |
" if u_id not in comments_by_author:\n", | |
" comments_by_author[u_id] = []\n", | |
" comments_by_author[u_id].append(comment)\n", | |
"\n", | |
"top_comments_authors = Counter({u_id: len(comments) for u_id, comments in comments_by_author.items()})\n", | |
"\n", | |
"top_count = 350\n", | |
"user_id_order1 = [\n", | |
" int(user_id)\n", | |
" for user_id, val in top_comments_authors.most_common()[:top_count]\n", | |
"]\n", | |
"user_id_order = []\n", | |
"for user_id in user_id_order1:\n", | |
" if str(user_id) not in user_info_db:\n", | |
" print(user_id, 'not found in user_info_db')\n", | |
" else:\n", | |
" user_id_order.append(user_id)\n", | |
"vals_order = [top_comments_authors[user_id] for user_id in user_id_order]\n", | |
"max_val = max(vals)\n", | |
"\n", | |
"shapes_order = []\n", | |
"cnt0 = 0\n", | |
"cnt1 = 0\n", | |
"for user_id, val in zip(user_id_order, vals_order):\n", | |
" ui = user_info_db.get(str(user_id))\n", | |
" if ui is None:\n", | |
" # print(user_id)\n", | |
" cnt0 += 1\n", | |
" continue\n", | |
" cnt1 += 1\n", | |
" s = (val / max_val) ** 0.5 * 200\n", | |
" label = ui['name']\n", | |
" label_width = get_text_width(str(label), 100)\n", | |
" shapes_order.append(make_rect(0, 0, s, s))\n", | |
"\n", | |
"placed_shapes = place_shapes(shapes_order, max_r=1624, trials=3000, mode='rect')" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Draw output image" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 206, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import cairo\n", | |
"import math\n", | |
"from io import BytesIO\n", | |
"from PIL import Image\n", | |
"\n", | |
"bbox = pad_bbox(shapes_bbox(placed_shapes), 40)\n", | |
"scale_coeff = 1\n", | |
"\n", | |
"WIDTH, HEIGHT = int(bbox[2] * scale_coeff), int(bbox[3] * scale_coeff)\n", | |
"\n", | |
"surface = cairo.ImageSurface(cairo.FORMAT_ARGB32, WIDTH, HEIGHT)\n", | |
"ctx = cairo.Context(surface)\n", | |
"\n", | |
"ctx.scale(scale_coeff, scale_coeff)\n", | |
"ctx.translate(-bbox[0], -bbox[1])\n", | |
"\n", | |
"# ctx.set_source_rgb(1.0, 1.0, 1.0)\n", | |
"ctx.set_source_rgb(0.0, 0.0, 0.0)\n", | |
"ctx.paint()\n", | |
"\n", | |
"for user_id, shape in zip(user_id_order, placed_shapes):\n", | |
" ui = user_info_db[str(user_id)]\n", | |
" label = ui['name']\n", | |
" label_ref_width = get_text_width(label, 100)\n", | |
"\n", | |
" assert shape[0] == 'rect'\n", | |
" x, y, w, h = shape[1:]\n", | |
" ctx.save()\n", | |
" ctx.new_path()\n", | |
" ctx.translate(x - w / 2, y - h / 2)\n", | |
" ctx.rectangle(0, 0, w, h)\n", | |
" ctx.set_line_width(3.0)\n", | |
" ctx.set_source_rgb(1.0, 0.0, 0.0)\n", | |
" # ctx.stroke_preserve()\n", | |
" \n", | |
" fpath = f'../aura/avatars/200/{user_id}.jfif'\n", | |
"\n", | |
" im = Image.open(fpath)\n", | |
" buffer = BytesIO()\n", | |
" im.save(buffer, format=\"PNG\")\n", | |
" buffer.seek(0)\n", | |
" im_surface = cairo.ImageSurface.create_from_png(buffer)\n", | |
"\n", | |
" ctx.scale(w / im_surface.get_width(), h / im_surface.get_height())\n", | |
" ctx.set_source_surface(im_surface)\n", | |
" ctx.fill()\n", | |
" ctx.restore()\n", | |
"\n", | |
" label_width = w * 0.9\n", | |
" font_size = label_width / label_ref_width * 100\n", | |
"\n", | |
" ctx.new_path()\n", | |
" set_font(ctx)\n", | |
" ctx.move_to(x - label_width / 2, y + h / 2 - h * 0.05)\n", | |
" ctx.set_font_size(font_size)\n", | |
" ctx.text_path(label)\n", | |
" ctx.set_source_rgb(0.0, 0.0, 0.0)\n", | |
" ctx.stroke_preserve()\n", | |
" ctx.set_line_width(5)\n", | |
" ctx.set_source_rgb(1.0, 1.0, 1.0)\n", | |
" ctx.fill()\n", | |
"\n", | |
"surface.write_to_png(\"top_commenters_visualization.png\")" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "vis_modules", | |
"language": "python", | |
"name": "vis_modules" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.5" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment