Last active
November 21, 2019 00:13
-
-
Save joshhills/03dd0721a04d2ae64a269bd02b72daee to your computer and use it in GitHub Desktop.
Super Fan Reviews Scraper Methods
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const TWITTER_CHARACTER_LIMIT = 280; // The bottleneck | |
const MIN_REVIEW_SIZE = 10; // Minimum amount of characters acceptable to be interesting | |
const TEXT_PADDING = 25; // How much spacing is in the post template to be safe | |
const AVERAGE_PLAYTIME_THRESHOLD = 10; // Multiplier for necessary time played to consider | |
const MIN_AVERAGE_FOREVER = 120; // Only choose games that have some activity in the past couple of weeks | |
const MIN_NEGATIVE_REVIEWS = 100; // Only choose games that have had some reviews | |
const alreadyScrapedGames = await database.ref('games/').once('value').then(function(snapshot) { | |
return Object.keys(snapshot.val()); | |
}); | |
async function getGame(appId) { | |
return fetch(`https://steamspy.com/api.php?request=appdetails&appid=${appId}`) | |
.then(res => res.json()) | |
.then((data) => { | |
return { | |
"id": appId, | |
"name": data.name, | |
"developer": data.developer, | |
"averagePlaytimeForever": data.average_forever, | |
"negative": data.negative | |
}; | |
}); | |
} | |
async function getGames() { | |
return fetch("https://steamspy.com/api.php?request=top100in2weeks") | |
.then(res => res.json()) | |
.then((data) => { | |
return Object.keys(data).map((gameId) => { | |
return { | |
"id": gameId, | |
"name": data[gameId].name, | |
"developer": data[gameId].developer, | |
"averagePlaytimeForever": data[gameId].average_forever, | |
"negative": data[gameId].negative | |
}; | |
}); | |
}); | |
} | |
async function scrapeAllReviews(game) { | |
console.log(`Scraping reviews for ${game.name}`); | |
let cursor = null; | |
do { | |
cursor = await scrapeASinglePageOfReviews(game, cursor); | |
} while (cursor !== null); | |
return; | |
} | |
async function scrapeASinglePageOfReviews(game, cursor) { | |
if (cursor) { | |
cursor = encodeURIComponent(cursor); | |
} | |
let reviewUrl = `https://store.steampowered.com/appreviews/${game.id}?json=1&filter=recent&language=english&review_type=negative&purchase_type=all&num_per_page=100&cursor=${cursor ? cursor : ""}`; | |
return fetch(reviewUrl).then(res => { try { | |
return res.json() } | |
catch(e) { | |
return null; | |
}}).then((data) => { | |
if (data === null) { | |
console.log(`WARNING: END OF JSON INPUT FOR GAME ${game.id}`); | |
return null; | |
} | |
if (data.success && data.query_summary.num_reviews > 0) { | |
for (const review of data.reviews) { | |
if (reviewMeetsCriteria(game, review)) { | |
const reviewTruncated = { | |
"id": review.recommendationid, | |
"gameId": game.id, | |
"gameName": game.name, | |
"hoursPlayed": Math.floor(review.author.playtime_forever / 60), | |
"text": review.review.replace(/^\s+|\s+$/g, '').replace(/\s+/g, ' ').replace(/\r?\n|\r/g, ''), | |
"freebie": review.received_for_free, | |
"early": review.written_during_early_access | |
}; | |
console.log(`"${reviewTruncated.text}", on ${reviewTruncated.gameName} with ${reviewTruncated.hoursPlayed} hours played. ${reviewTruncated.freebie ? "Product received for free. " : ""}${reviewTruncated.early ? "Product has not yet been released." : ""} - author: ${review.author.steamid}\n`) | |
database.ref(`candidates/${reviewTruncated.gameId}/${reviewTruncated.id}`).set(reviewTruncated); | |
} | |
} | |
return data.cursor; | |
} else { | |
return null; | |
} | |
}); | |
} | |
function gameMeetsCriteria(game) { | |
return game.averagePlaytimeForever > MIN_AVERAGE_FOREVER && game.negative > MIN_NEGATIVE_REVIEWS && !alreadyScrapedGames.includes(game.id); | |
} | |
function reviewMeetsCriteria(game, review) { | |
const computedLength = review.review.length + game.name.length + game.developer.length + TEXT_PADDING; | |
return review.voted_up === false && | |
review.author.playtime_forever > game.averagePlaytimeForever * AVERAGE_PLAYTIME_THRESHOLD && | |
review.review.length > MIN_REVIEW_SIZE && | |
computedLength < TWITTER_CHARACTER_LIMIT; | |
} | |
async function scrapeTop100Games2Weeks() { | |
const games = await getGames(); | |
for (const game of games) { | |
console.log(`Checking if ${game.name} meets criteria...`); | |
if(gameMeetsCriteria(game)) { | |
database.ref('games/' + game.id).set(game); | |
await scrapeAllReviews(game); | |
} | |
} | |
} | |
async function scrapeGame(appId) { | |
const game = await getGame(appId); | |
console.log(`Checking if ${game.name} meets criteria...`); | |
if (gameMeetsCriteria(game)) { | |
database.ref('games/' + game.id).set(game); | |
return await scrapeAllReviews(game); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment