Skip to content

Instantly share code, notes, and snippets.

@senko
Last active November 3, 2022 21:40
Show Gist options
  • Star 3 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save senko/b031f61f61d89f96e165659d3f022784 to your computer and use it in GitHub Desktop.
Save senko/b031f61f61d89f96e165659d3f022784 to your computer and use it in GitHub Desktop.
Get the most popular languages on Hacker News
#!/usr/bin/env python
#
# Calculate top list of programming languages based on HN stories/comments/points
# More info: https://blog.senko.net/relative-popularity-of-programming-languages-on-hacker-news
from datetime import datetime, timedelta
from json import dump, load
from os.path import join, exists
from tempfile import gettempdir
from typing import Optional
import requests
HN_SEARCH_URL = "https://hn.algolia.com/api/v1/search_by_date"
HN_SEARCH_PARAMS = {
"tags": "story",
"restrictSearchableAttributes": "title",
"hitsPerPage": "1000",
}
LANGUAGES = [
"C",
"C++",
"C#",
"Clojure",
"Dart",
"Elixir",
"Erlang",
"F#",
"Go",
"Haskell",
"Java",
"JavaScript",
"Kotlin",
"Lisp",
"Lua",
"PHP",
"Python",
"Racket",
"Ruby",
"Rust",
"Scala",
"Scheme",
"Swift",
"TypeScript",
"Zig",
]
def get_page(query: str, created_since: datetime, page: int) -> list[dict]:
params = {
"query": query,
"numericFilters": f"created_at_i > {created_since.timestamp()}",
}
params.update(HN_SEARCH_PARAMS)
if page:
params["page"] = page
resp = requests.get(HN_SEARCH_URL, params=params)
resp.raise_for_status()
return resp.json()
def get_results(lang: str) -> list[dict]:
query = f'"in {lang}"'
created_since = datetime.now() - timedelta(days=365)
page = 0
has_more = True
results = []
while has_more:
response = get_page(query, created_since, page)
results.extend(response["hits"])
has_more = response.get("nbPages", 0) > page
page += 1
return results
def download_language_results(data_dir: str, lang: str) -> list[dict]:
fname = join(data_dir, lang + ".json")
if exists(fname):
with open(fname, "r") as fp:
return load(fp)
results = get_results(lang)
with open(fname, "w") as fp:
dump(results, fp)
return results
def get_all_results(data_dir: str) -> dict[str,list[dict]]:
all_results = {}
for lang in LANGUAGES:
all_results[lang] = download_language_results(data_dir, lang)
return all_results
def postprocess_results(results: dict[str,list[dict]]) -> dict[str,list[dict]]:
cpp_results = set(item["objectID"] for item in results["C++"])
csharp_results = set(item["objectID"] for item in results["C#"])
non_c_results = cpp_results | csharp_results
# All C++/C# results also match C, so we remove them in postproc
results["C"] = [ item for item in results["C"] if item["objectID"] not in non_c_results ]
# Add Racket results to Scheme
results["Scheme"].extend(results["Racket"])
del results["Racket"]
return results
def calculate_score(results: dict[str,list[dict]]):
score = {}
for lang, stories in results.items():
score[lang] = {
"stories": len(stories),
"comments": sum(s["num_comments"] for s in stories),
"points": sum(s["points"] for s in stories)
}
return score
def sort_by_stories(score):
return sorted(score.keys(), key=lambda lang: score[lang]["stories"], reverse=True)
def sort_by_comments(score):
return sorted(score.keys(), key=lambda lang: score[lang]["comments"], reverse=True)
def sort_by_points(score):
return sorted(score.keys(), key=lambda lang: score[lang]["points"], reverse=True)
def generate_top_list(data_dir):
score = calculate_score(postprocess_results(get_all_results(data_dir)))
return {
"score": score,
"by_stories": sort_by_stories(score),
"by_comments": sort_by_comments(score),
"by_points": sort_by_points(score),
}
def print_top_list(top_list):
print("By number of stories:")
for place, lang in enumerate(top_list["by_stories"]):
print(f" {place+1:>2}. {lang} ({top_list['score'][lang]['stories']} stories)")
print("\nBy number of comments:")
for place, lang in enumerate(top_list["by_comments"]):
print(f" {place+1:>2}. {lang} ({top_list['score'][lang]['comments']} comments)")
print("\nBy points:")
for place, lang in enumerate(top_list["by_points"]):
print(f" {place+1:>2}. {lang} ({top_list['score'][lang]['points']} points)")
if __name__ == "__main__":
top_list = generate_top_list(join(gettempdir(), "hn"))
print_top_list(top_list)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment