Skip to content

Instantly share code, notes, and snippets.

@lmandel
Created August 1, 2013 15:11
Show Gist options
  • Save lmandel/6132291 to your computer and use it in GitHub Desktop.
Save lmandel/6132291 to your computer and use it in GitHub Desktop.
Scrape ratings data from Google Play
#!/bin/sh
DEBUG=false
DATE=`date +%Y%m%d`
PLAY_URL=https://play.google.com/store/apps/details?id=
PRODUCT=$1
#if [ $1 == 'firefox' ]
#then
# PRODUCT=org.mozilla.firefox
#fi
#if [ $1 == 'beta' ]
#then
# PRODUCT=org.mozilla.firefox_beta
#fi
OUTPUT_PATH=/home/lmandel/public_html/reviews/
FILENAME=${PRODUCT}.html
JSON_FILENAME=${PRODUCT}-${DATE}.json
wget -O ${FILENAME} ${PLAY_URL}${PRODUCT}
RATING=`grep -m 1 -o '<div class="ratings goog-inline-block" title="Rating: [0-9\.]\{1,3\} stars' $FILENAME | sed '2,$d' | sed 's/<div class="ratings goog-inline-block" title="Rating: \([0-9\.]*\) stars/\1/'`
ONSTARS=`grep -m 1 -o '<div class="goog-inline-block star SPRITE_star_o\{0,1\}n\{0,1\}f\{0,2\}h\{0,1\}a\{0,1\}l\{0,1\}f\{0,1\}_dark' ${FILENAME} | sed '6,$d' | grep -c "SPRITE_star_on_dark"`
HALFSTARS=`grep -m 1 -o '<div class="goog-inline-block star SPRITE_star_o\{0,1\}n\{0,1\}f\{0,2\}h\{0,1\}a\{0,1\}l\{0,1\}f\{0,1\}_dark' ${FILENAME} | sed '6,$d' | grep -c "SPRITE_star_half_dark"`
OFFSTARS=`grep -m 1 -o '<div class="goog-inline-block star SPRITE_star_o\{0,1\}n\{0,1\}f\{0,2\}h\{0,1\}a\{0,1\}l\{0,1\}f\{0,1\}_dark' ${FILENAME} | sed '6,$d' | grep -c "SPRITE_star_off_dark"`
FIVESTARVOTES=`grep -m 1 -o '<span class="bar bar[1-5]" style="width:[0-9]*px">&nbsp\;</span>&nbsp;<span>[0-9,]*</span>' ${FILENAME} | sed '2,$d' | sed 's/<span class="bar bar[1-5]" style="width:[0-9]*px">&nbsp;<\/span>&nbsp;<span>\([0-9,]*\)<\/span>/\1/'`
FOURSTARVOTES=`grep -m 1 -o '<span class="bar bar[1-5]" style="width:[0-9]*px">&nbsp\;</span>&nbsp;<span>[0-9,]*</span>' ${FILENAME} | sed '3,$d' | sed '1d' | sed 's/<span class="bar bar[1-5]" style="width:[0-9]*px">&nbsp;<\/span>&nbsp;<span>\([0-9,]*\)<\/span>/\1/'`
THREESTARVOTES=`grep -m 1 -o '<span class="bar bar[1-5]" style="width:[0-9]*px">&nbsp\;</span>&nbsp;<span>[0-9,]*</span>' ${FILENAME} | sed '4,$d' | sed '1,2d' | sed 's/<span class="bar bar[1-5]" style="width:[0-9]*px">&nbsp;<\/span>&nbsp;<span>\([0-9,]*\)<\/span>/\1/'`
TWOSTARVOTES=`grep -m 1 -o '<span class="bar bar[1-5]" style="width:[0-9]*px">&nbsp\;</span>&nbsp;<span>[0-9,]*</span>' ${FILENAME} | sed '5,$d' | sed '1,3d' | sed 's/<span class="bar bar[1-5]" style="width:[0-9]*px">&nbsp;<\/span>&nbsp;<span>\([0-9,]*\)<\/span>/\1/'`
ONESTARVOTES=`grep -m 1 -o '<span class="bar bar[1-5]" style="width:[0-9]*px">&nbsp\;</span>&nbsp;<span>[0-9,]*</span>' ${FILENAME} | sed '6,$d' | sed '1,4d' | sed 's/<span class="bar bar[1-5]" style="width:[0-9]*px">&nbsp;<\/span>&nbsp;<span>\([0-9,]*\)<\/span>/\1/'`
if [ $DEBUG == 'true' ]
then
echo "Rating $RATING"
echo "ON stars $ONSTARS"
echo "HALF stars $HALFSTARS"
echo "OFF stars $OFFSTARS"
echo "5 star votes $FIVESTARVOTES"
echo "4 star votes $FOURSTARVOTES"
echo "3 star votes $THREESTARVOTES"
echo "2 star votes $TWOSTARVOTES"
echo "1 star votes $ONESTARVOTES"
fi
echo "{'rating':${RATING},'stars':{'on':${ONSTARS},'half':${HALFSTARS},'off':${OFFSTARS}},'votes':{'five':'${FIVESTARVOTES}','four':'${FOURSTARVOTES}','three':'${THREESTARVOTES}','two':'${TWOSTARVOTES}','one':'${ONESTARVOTES}'}}" > ${OUTPUT_PATH}${JSON_FILENAME}
rm $FILENAME
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment