Skip to content

Instantly share code, notes, and snippets.

@paulmwatson
Created January 26, 2021 07:54
Show Gist options
  • Save paulmwatson/2818ce34313daa13749543d4b3fee524 to your computer and use it in GitHub Desktop.
Save paulmwatson/2818ce34313daa13749543d4b3fee524 to your computer and use it in GitHub Desktop.
Python code to replicate the Twitter Birdwatch Note Ranking algorithm
#Credit: https://twitter.github.io/birdwatch/about/ranking-notes/
import pandas as pd
notes = pd.read_csv("notes-00000.tsv", sep="\t")
ratings = pd.read_csv("ratings-00000.tsv", sep="\t")
ratingsWithNotes = notes.set_index("noteId").join(
ratings.set_index("noteId"), lsuffix="\_note", rsuffix="\_rating", how="inner"
)
ratingsWithNotes["numRatings"] = 1
def getCurrentlyRatedHelpfulNotesForTweet(
tweetId,
noteScoreSmoothingParameter=5,
maxCurrentlyRatedHelpfulNotes=3,
minRatingsNeeded=3,
minSmoothedHelpfulnessScoreNeeded=0.5,
):
ratingsWithNotesForTweet = ratingsWithNotes[ratingsWithNotes["tweetId"] == tweetId]
scoredNotes = ratingsWithNotesForTweet.groupby("noteId").sum()
scoredNotes["smoothedHelpfulnessScore"] = scoredNotes["helpful"] / (
scoredNotes["numRatings"] + noteScoreSmoothingParameter
)
filteredNotes = scoredNotes[
(scoredNotes["numRatings"] >= minRatingsNeeded)
& (scoredNotes["smoothedHelpfulnessScore"] >= minSmoothedHelpfulnessScoreNeeded)
]
return filteredNotes.sort_values(by="smoothedHelpfulnessScore", ascending=False)[
:maxCurrentlyRatedHelpfulNotes
]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment