Skip to content

Instantly share code, notes, and snippets.

@pallada-92
Last active March 22, 2020 22:03
Show Gist options
  • Save pallada-92/6550210845416903455ae4657d05dd0e to your computer and use it in GitHub Desktop.
Save pallada-92/6550210845416903455ae4657d05dd0e to your computer and use it in GitHub Desktop.
Aura top commenters visualization
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Визуализация самых активных комментаторов Ауры"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Этот ноутбук не предполагается для запуска т.к. использует внешние файлы с данными пользователей Ауры, которые были собраны мной вручную. Тем не менее, он может быть полезен для изучения наивного алгоритма укладки кругов или квадратов, а также вывода изображения при помощи cairo."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Алгоритм укладки кругов — моя модификация Wordle. Он просто пытается поместить фигуры в случайные места изображения до тех пор, пока они не будут пересекаться с предыдущими фигурами. Радиус, в котором производится поиск, постепенно увеличивается. Отличие от Wordle в том, что вместо детерминированной спирали используется генератор случайных чисел."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Описание Worlde можно найти тут: http://static.mrfeinberg.com/bv_ch03.pdf"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Load data"
]
},
{
"cell_type": "code",
"execution_count": 195,
"metadata": {},
"outputs": [],
"source": [
"import json\n",
"from collections import Counter\n",
"\n",
"with open('../aura/db/user_info.json', encoding='utf-8') as f:\n",
" user_info_db = json.load(f)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Load avatars"
]
},
{
"cell_type": "code",
"execution_count": 197,
"metadata": {},
"outputs": [],
"source": [
"import requests\n",
"\n",
"from IPython.display import display\n",
"from PIL import Image\n",
"from io import BytesIO\n",
"from tqdm import tqdm_notebook as tqdm\n",
"\n",
"import os\n",
"\n",
"size = 200\n",
"os.makedirs(f'../aura/avatars/{size}', exist_ok=1)\n",
"for ui in tqdm(user_info_db.values()):\n",
" uid = ui['id']\n",
" assert ui['photo_url'] == ui['ava_url']\n",
" if ui['photo_url'] == '':\n",
" print(ui['url'])\n",
" continue\n",
" photo_url = '/'.join(ui['photo_url'].split('/')[:-1]) + f'/{size}'\n",
" fpath = f'../aura/avatars/{size}/{uid}.jfif'\n",
" if os.path.exists(fpath):\n",
" continue\n",
" req = requests.get(photo_url)\n",
" with open(fpath, 'wb') as f:\n",
" f.write(req.content)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Layout"
]
},
{
"cell_type": "code",
"execution_count": 199,
"metadata": {},
"outputs": [],
"source": [
"def make_circ(x, y, r):\n",
" return ('circ', x, y, r)\n",
"\n",
"def make_rect(cx, cy, w, h):\n",
" return ('rect', cx, cy, w, h)\n",
"\n",
"def check_intersect(shape1, shape2):\n",
" if shape1 is None or shape2 is None:\n",
" return False\n",
" if shape1[0] == 'circ' and shape2[0] == 'circ':\n",
" s1, x1, y1, r1 = shape1\n",
" s2, x2, y2, r2 = shape2\n",
" return (x2 - x1) ** 2 + (y2 - y1) ** 2 <= (r1 + r2) ** 2\n",
" if shape1[0] == 'rect' and shape2[0] == 'rect':\n",
" s1, cx1, cy1, w1, h1 = shape1\n",
" s2, cx2, cy2, w2, h2 = shape2\n",
" return abs(cx1 - cx2) * 2 <= w1 + w2 and abs(cy1 - cy2) * 2 <= h1 + h2\n",
" raise NotImplementedError()\n",
"\n",
"import random\n",
"import math\n",
"\n",
"def place_shape(placed_shapes, new_shape, max_r=10, trials=100, mode='circ'):\n",
" ox, oy = new_shape[1], new_shape[2]\n",
" candidate_shape = list(new_shape)\n",
" for trial in range(trials):\n",
" cur_r = (trial / trials) * 0.5 * max_r\n",
" if mode == 'circ':\n",
" cur_a = random.random() * math.pi * 2\n",
" dx, dy = cur_r * math.cos(cur_a), cur_r * math.sin(cur_a)\n",
" else:\n",
" dx = cur_r * (random.random() * 2 - 1)\n",
" dy = cur_r * (random.random() * 2 - 1)\n",
" candidate_shape[1] = ox + dx\n",
" candidate_shape[2] = oy + dy\n",
" for other_shape in placed_shapes:\n",
" if check_intersect(candidate_shape, other_shape):\n",
" break\n",
" else:\n",
" return tuple(candidate_shape)\n",
" return None\n",
"\n",
"def place_shapes(shapes, max_r=10, trials=100, mode='circ'):\n",
" random.seed(0)\n",
" placed_shapes = []\n",
" for shape in shapes:\n",
" placed_shape = place_shape(placed_shapes, shape, max_r=max_r, trials=trials, mode=mode)\n",
" placed_shapes.append(placed_shape)\n",
" return placed_shapes\n",
"\n",
"def shapes_bbox(shapes):\n",
" x0 = y0 = float('inf')\n",
" x1 = y1 = float('-inf')\n",
" for shape in shapes:\n",
" if shape[0] == 'rect':\n",
" x, y, w, h = shape[1:]\n",
" elif shape[0] == 'circ':\n",
" x, y, r = shape[1:]\n",
" w = h = 2 * r\n",
" else:\n",
" raise NotImplementedError()\n",
" x0 = min(x0, x - w / 2)\n",
" y0 = min(y0, y - h / 2)\n",
" x1 = max(x1, x + w / 2)\n",
" y1 = max(y1, y + h / 2)\n",
" return x0, y0, x1 - x0, y1 - y0\n",
"\n",
"def pad_bbox(bbox, pad):\n",
" x, y, w, h = bbox\n",
" return x - pad, y - pad, w + pad * 2, h + pad * 2"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Text metrics"
]
},
{
"cell_type": "code",
"execution_count": 200,
"metadata": {},
"outputs": [],
"source": [
"import cairo\n",
"\n",
"def set_font(ctx):\n",
" ctx.select_font_face('Courier New', cairo.FONT_SLANT_NORMAL, cairo.FONT_WEIGHT_BOLD)\n",
"\n",
"text_metrics = {}\n",
"text_metrics_ctx = cairo.Context(cairo.ImageSurface(cairo.FORMAT_ARGB32, 100, 100))\n",
"set_font(text_metrics_ctx)\n",
"text_metrics_ctx.set_font_size(100)\n",
"\n",
"def get_text_width(text, size):\n",
" if text not in text_metrics:\n",
" print('recalc')\n",
" tx = text_metrics_ctx.text_extents(text)\n",
" w = tx.width\n",
" text_metrics[text] = w\n",
" return text_metrics[text] * size / 100"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Prepare users data"
]
},
{
"cell_type": "code",
"execution_count": 201,
"metadata": {},
"outputs": [],
"source": [
"with open('../aura/db/comments.json', encoding='utf-8') as f:\n",
" comments_db = json.load(f)\n",
"\n",
"comments_by_author = {}\n",
"for comment in comments_db.values():\n",
" u_id = comment['author']['id']\n",
" if u_id not in comments_by_author:\n",
" comments_by_author[u_id] = []\n",
" comments_by_author[u_id].append(comment)\n",
"\n",
"top_comments_authors = Counter({u_id: len(comments) for u_id, comments in comments_by_author.items()})\n",
"\n",
"top_count = 350\n",
"user_id_order1 = [\n",
" int(user_id)\n",
" for user_id, val in top_comments_authors.most_common()[:top_count]\n",
"]\n",
"user_id_order = []\n",
"for user_id in user_id_order1:\n",
" if str(user_id) not in user_info_db:\n",
" print(user_id, 'not found in user_info_db')\n",
" else:\n",
" user_id_order.append(user_id)\n",
"vals_order = [top_comments_authors[user_id] for user_id in user_id_order]\n",
"max_val = max(vals)\n",
"\n",
"shapes_order = []\n",
"cnt0 = 0\n",
"cnt1 = 0\n",
"for user_id, val in zip(user_id_order, vals_order):\n",
" ui = user_info_db.get(str(user_id))\n",
" if ui is None:\n",
" # print(user_id)\n",
" cnt0 += 1\n",
" continue\n",
" cnt1 += 1\n",
" s = (val / max_val) ** 0.5 * 200\n",
" label = ui['name']\n",
" label_width = get_text_width(str(label), 100)\n",
" shapes_order.append(make_rect(0, 0, s, s))\n",
"\n",
"placed_shapes = place_shapes(shapes_order, max_r=1624, trials=3000, mode='rect')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Draw output image"
]
},
{
"cell_type": "code",
"execution_count": 206,
"metadata": {},
"outputs": [],
"source": [
"import cairo\n",
"import math\n",
"from io import BytesIO\n",
"from PIL import Image\n",
"\n",
"bbox = pad_bbox(shapes_bbox(placed_shapes), 40)\n",
"scale_coeff = 1\n",
"\n",
"WIDTH, HEIGHT = int(bbox[2] * scale_coeff), int(bbox[3] * scale_coeff)\n",
"\n",
"surface = cairo.ImageSurface(cairo.FORMAT_ARGB32, WIDTH, HEIGHT)\n",
"ctx = cairo.Context(surface)\n",
"\n",
"ctx.scale(scale_coeff, scale_coeff)\n",
"ctx.translate(-bbox[0], -bbox[1])\n",
"\n",
"# ctx.set_source_rgb(1.0, 1.0, 1.0)\n",
"ctx.set_source_rgb(0.0, 0.0, 0.0)\n",
"ctx.paint()\n",
"\n",
"for user_id, shape in zip(user_id_order, placed_shapes):\n",
" ui = user_info_db[str(user_id)]\n",
" label = ui['name']\n",
" label_ref_width = get_text_width(label, 100)\n",
"\n",
" assert shape[0] == 'rect'\n",
" x, y, w, h = shape[1:]\n",
" ctx.save()\n",
" ctx.new_path()\n",
" ctx.translate(x - w / 2, y - h / 2)\n",
" ctx.rectangle(0, 0, w, h)\n",
" ctx.set_line_width(3.0)\n",
" ctx.set_source_rgb(1.0, 0.0, 0.0)\n",
" # ctx.stroke_preserve()\n",
" \n",
" fpath = f'../aura/avatars/200/{user_id}.jfif'\n",
"\n",
" im = Image.open(fpath)\n",
" buffer = BytesIO()\n",
" im.save(buffer, format=\"PNG\")\n",
" buffer.seek(0)\n",
" im_surface = cairo.ImageSurface.create_from_png(buffer)\n",
"\n",
" ctx.scale(w / im_surface.get_width(), h / im_surface.get_height())\n",
" ctx.set_source_surface(im_surface)\n",
" ctx.fill()\n",
" ctx.restore()\n",
"\n",
" label_width = w * 0.9\n",
" font_size = label_width / label_ref_width * 100\n",
"\n",
" ctx.new_path()\n",
" set_font(ctx)\n",
" ctx.move_to(x - label_width / 2, y + h / 2 - h * 0.05)\n",
" ctx.set_font_size(font_size)\n",
" ctx.text_path(label)\n",
" ctx.set_source_rgb(0.0, 0.0, 0.0)\n",
" ctx.stroke_preserve()\n",
" ctx.set_line_width(5)\n",
" ctx.set_source_rgb(1.0, 1.0, 1.0)\n",
" ctx.fill()\n",
"\n",
"surface.write_to_png(\"top_commenters_visualization.png\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "vis_modules",
"language": "python",
"name": "vis_modules"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment