Skip to content

Instantly share code, notes, and snippets.

@joshhills
Last active November 21, 2019 00:13
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save joshhills/03dd0721a04d2ae64a269bd02b72daee to your computer and use it in GitHub Desktop.
Save joshhills/03dd0721a04d2ae64a269bd02b72daee to your computer and use it in GitHub Desktop.
Super Fan Reviews Scraper Methods
const TWITTER_CHARACTER_LIMIT = 280; // The bottleneck
const MIN_REVIEW_SIZE = 10; // Minimum amount of characters acceptable to be interesting
const TEXT_PADDING = 25; // How much spacing is in the post template to be safe
const AVERAGE_PLAYTIME_THRESHOLD = 10; // Multiplier for necessary time played to consider
const MIN_AVERAGE_FOREVER = 120; // Only choose games that have some activity in the past couple of weeks
const MIN_NEGATIVE_REVIEWS = 100; // Only choose games that have had some reviews
const alreadyScrapedGames = await database.ref('games/').once('value').then(function(snapshot) {
return Object.keys(snapshot.val());
});
async function getGame(appId) {
return fetch(`https://steamspy.com/api.php?request=appdetails&appid=${appId}`)
.then(res => res.json())
.then((data) => {
return {
"id": appId,
"name": data.name,
"developer": data.developer,
"averagePlaytimeForever": data.average_forever,
"negative": data.negative
};
});
}
async function getGames() {
return fetch("https://steamspy.com/api.php?request=top100in2weeks")
.then(res => res.json())
.then((data) => {
return Object.keys(data).map((gameId) => {
return {
"id": gameId,
"name": data[gameId].name,
"developer": data[gameId].developer,
"averagePlaytimeForever": data[gameId].average_forever,
"negative": data[gameId].negative
};
});
});
}
async function scrapeAllReviews(game) {
console.log(`Scraping reviews for ${game.name}`);
let cursor = null;
do {
cursor = await scrapeASinglePageOfReviews(game, cursor);
} while (cursor !== null);
return;
}
async function scrapeASinglePageOfReviews(game, cursor) {
if (cursor) {
cursor = encodeURIComponent(cursor);
}
let reviewUrl = `https://store.steampowered.com/appreviews/${game.id}?json=1&filter=recent&language=english&review_type=negative&purchase_type=all&num_per_page=100&cursor=${cursor ? cursor : ""}`;
return fetch(reviewUrl).then(res => { try {
return res.json() }
catch(e) {
return null;
}}).then((data) => {
if (data === null) {
console.log(`WARNING: END OF JSON INPUT FOR GAME ${game.id}`);
return null;
}
if (data.success && data.query_summary.num_reviews > 0) {
for (const review of data.reviews) {
if (reviewMeetsCriteria(game, review)) {
const reviewTruncated = {
"id": review.recommendationid,
"gameId": game.id,
"gameName": game.name,
"hoursPlayed": Math.floor(review.author.playtime_forever / 60),
"text": review.review.replace(/^\s+|\s+$/g, '').replace(/\s+/g, ' ').replace(/\r?\n|\r/g, ''),
"freebie": review.received_for_free,
"early": review.written_during_early_access
};
console.log(`"${reviewTruncated.text}", on ${reviewTruncated.gameName} with ${reviewTruncated.hoursPlayed} hours played. ${reviewTruncated.freebie ? "Product received for free. " : ""}${reviewTruncated.early ? "Product has not yet been released." : ""} - author: ${review.author.steamid}\n`)
database.ref(`candidates/${reviewTruncated.gameId}/${reviewTruncated.id}`).set(reviewTruncated);
}
}
return data.cursor;
} else {
return null;
}
});
}
function gameMeetsCriteria(game) {
return game.averagePlaytimeForever > MIN_AVERAGE_FOREVER && game.negative > MIN_NEGATIVE_REVIEWS && !alreadyScrapedGames.includes(game.id);
}
function reviewMeetsCriteria(game, review) {
const computedLength = review.review.length + game.name.length + game.developer.length + TEXT_PADDING;
return review.voted_up === false &&
review.author.playtime_forever > game.averagePlaytimeForever * AVERAGE_PLAYTIME_THRESHOLD &&
review.review.length > MIN_REVIEW_SIZE &&
computedLength < TWITTER_CHARACTER_LIMIT;
}
async function scrapeTop100Games2Weeks() {
const games = await getGames();
for (const game of games) {
console.log(`Checking if ${game.name} meets criteria...`);
if(gameMeetsCriteria(game)) {
database.ref('games/' + game.id).set(game);
await scrapeAllReviews(game);
}
}
}
async function scrapeGame(appId) {
const game = await getGame(appId);
console.log(`Checking if ${game.name} meets criteria...`);
if (gameMeetsCriteria(game)) {
database.ref('games/' + game.id).set(game);
return await scrapeAllReviews(game);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment