Skip to content

Instantly share code, notes, and snippets.

@nicktimko
Created March 31, 2019 16:06
Show Gist options
  • Save nicktimko/f535fdcc14e06983454bdaecf053e564 to your computer and use it in GitHub Desktop.
Save nicktimko/f535fdcc14e06983454bdaecf053e564 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import collections\n",
"import itertools\n",
"import time\n",
"\n",
"import requests"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"url = \"https://api.stackexchange.com/2.2/questions\"\n",
"query = {\n",
" \"order\": \"desc\",\n",
" \"sort\": \"activity\",\n",
" \"tagged\": \"filter\",\n",
" \"site\": \"webapps\",\n",
"}"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'page': 1, 'pagesize': 100}\n"
]
}
],
"source": [
"pageinfo = {\n",
" \"page\": 1,\n",
" \"pagesize\": 100,\n",
"}\n",
"questions = []\n",
"while True:\n",
" print(pageinfo)\n",
" response = requests.get(url, params={**query, **pageinfo})\n",
" \n",
" data = response.json()\n",
" items = data.pop(\"items\")\n",
"\n",
" questions.extend(items)\n",
" \n",
" if not data[\"has_more\"]:\n",
" break\n",
" if data[\"quota_remaining\"] < 100:\n",
" break\n",
" \n",
" pageinfo[\"page\"] += 1\n",
" time.sleep(0.5)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[(2, 22), (5, 19), (3, 18), (4, 11)]"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"n_tags_per_q = collections.Counter(len(q[\"tags\"]) for q in questions)\n",
"assert 1 not in n_tags_per_q # there are no questions *ONLY* tagged [filter]\n",
"n_tags_per_q.most_common()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[('google-sheets', 10),\n",
" ('twitter', 3),\n",
" ('google-search', 2),\n",
" ('yahoo-mail', 2),\n",
" ('netflix', 1),\n",
" ('spam-prevention', 1),\n",
" ('youtube', 1),\n",
" ('outlook.com', 1),\n",
" ('search', 1)]"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"the_one_other_tag = collections.Counter(itertools.chain(*(q[\"tags\"] for q in questions if len(q[\"tags\"]) == 2)))\n",
"the_one_other_tag.pop(\"filter\")\n",
"the_one_other_tag.most_common()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[('google-sheets', 42),\n",
" ('formulas', 27),\n",
" ('google-sheets-query', 14),\n",
" ('google-sheets-arrayformula', 13),\n",
" ('twitter', 5),\n",
" ('importrange', 4),\n",
" ('gmail', 4),\n",
" ('search', 4),\n",
" ('google-search', 4),\n",
" ('regex', 3)]"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"tag_counts = collections.Counter(itertools.chain(*(q[\"tags\"] for q in questions)))\n",
"assert tag_counts.pop(\"filter\") == len(questions) # is on all..boring.\n",
"tag_counts.most_common(10)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[('google-sheets', 42),\n",
" ('twitter', 5),\n",
" ('gmail', 4),\n",
" ('google-search', 3),\n",
" ('spam-prevention', 2),\n",
" ('yahoo-mail', 2),\n",
" ('search', 2),\n",
" ('google-apps', 1),\n",
" ('google-sheets-query', 1),\n",
" ('regex', 1),\n",
" ('netflix', 1),\n",
" ('youtube', 1),\n",
" ('outlook.com', 1),\n",
" ('amazon', 1),\n",
" ('firefox-extensions', 1),\n",
" ('google-reader', 1),\n",
" ('rss', 1)]"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"most_common_other_tags = collections.Counter()\n",
"for question in questions:\n",
" mcot = max(question[\"tags\"], key=lambda x: tag_counts[x])\n",
" most_common_other_tags[mcot] += 1\n",
"most_common_other_tags.most_common()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.1"
},
"spitball": {
"api_url": "",
"desc": "",
"id": "",
"vis": ""
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment