Last active
September 2, 2017 15:24
-
-
Save zhirzh/7666c2742a2c7dfbd9859b05ee76aef1 to your computer and use it in GitHub Desktop.
google doodles
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const fs = require('fs'); | |
const path = require('path'); | |
const rawDirPath = path.join(__dirname, 'raw'); | |
const allDoodlesPath = path.join(__dirname, 'doodles.all.json'); | |
const allDoodles = fs | |
.readdirSync(rawDirPath) | |
.reduce((_allDoodles, fileName) => { | |
const filePath = path.join(rawDirPath, fileName); | |
let fileDoodles = []; | |
try { | |
fileDoodles = JSON.parse(fs.readFileSync(filePath)); | |
} catch (err) { | |
console.error('Improper JSON:', filePath); | |
} | |
return _allDoodles.concat(fileDoodles); | |
}, []); | |
fs.writeFileSync(allDoodlesPath, JSON.stringify(allDoodles)); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
source "./fetch.sh" | |
CURRENT_YEAR=$(date +%Y) | |
CURRENT_MONTH=$(date +%m) | |
NULL_FILESIZE=2 | |
for (( YEAR = 1998; YEAR <= $CURRENT_YEAR; YEAR++ )) do | |
for (( MONTH = 1; MONTH <= 12; MONTH++ )) do | |
# fetch JSON | |
fetch $YEAR $MONTH | |
# bail | |
if [[ $YEAR == $CURRENT_YEAR && $ZERO_MONTH == $CURRENT_MONTH ]] | |
then | |
echo "BAIL" | |
break | |
fi | |
done | |
done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
source "./fetch.sh" | |
YEAR=$(date +%Y) | |
MONTH=$(date +%m) | |
fetch $YEAR $MONTH true |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
DATA_PATH="raw" | |
mkdir -p $DATA_PATH | |
NULL_FILESIZE=2 | |
fetch() { | |
YEAR=$1 | |
MONTH=$2 | |
FORCE=${3:-false} | |
ZERO_MONTH=$(printf %02d $(( 10#$MONTH))) # zero padding | |
URL="https://www.google.com/doodles/json/$YEAR/$ZERO_MONTH?full=1" | |
FILEPATH="$DATA_PATH/$YEAR-$ZERO_MONTH.json" | |
if [[ $FORCE != true ]] | |
then | |
if [[ -f "$FILEPATH" ]] | |
then | |
FILESIZE=$(wc -c < "$FILEPATH") | |
if [[ $FILESIZE -eq $NULL_FILESIZE ]] | |
then | |
echo "NULL: $FILEPATH" | |
else | |
echo "SKIP: $FILEPATH" | |
fi | |
return | |
fi | |
fi | |
echo "FETCH: $FILEPATH" | |
wget $URL -O "$FILEPATH" -q --show-progress | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const fs = require('fs'); | |
const path = require('path'); | |
const crypto = require('crypto'); | |
const allDoodles = require('./doodles.all.json'); | |
const linkTypes = [ | |
'alternate_url', | |
'call_to_action_image_url', | |
'hires_url', | |
'standalone_html', | |
'url', | |
]; | |
const urlPrefixes = [ | |
'lh3.googleusercontent.com', | |
'www.google.com/logos', | |
'www.google.com/logos/doodles', | |
]; | |
const schema = [ | |
/* 'alternate_url', | |
'blog_text', | |
'call_to_action_image_url', | |
'collection_id', | |
'countries', | |
'doodle_args', | |
'doodle_type', | |
'height', | |
'hires_height', | |
'hires_width', | |
'history_doodles', | |
'id', | |
'is_animated_gif', | |
'is_dynamic', | |
'is_global', | |
'is_highlighted', | |
'name', | |
'persistent_id', | |
'query', | |
'related_doodles', | |
'share_text', | |
'standalone_html', | |
'tags', | |
'translations', | |
'width', | |
'youtube_id', | |
*/ | |
'hires_url', | |
'next_doodle', | |
'prev_doodle', | |
'run_date_array', | |
'title', | |
'url', | |
'_id', // unique ID for each doodle | |
]; | |
/** | |
* Generate unique hashes for doodles, deterministically. | |
* @param {object} doodle - Doodle object to generate hash for. | |
* @returns {string} Unique hash for supplied doodle. | |
*/ | |
function generateDoodleHash(doodle) { | |
return crypto | |
.createHash('md5') | |
.update(`[${doodle.name}](${doodle.url})`, 'ascii') | |
.digest('hex'); | |
} | |
/** | |
* Write JSON to a file. | |
* @param {string} filepath - absolute path of output file | |
* @param {any} json - JSON to write | |
* @param {bool} pretty - Pretty print | |
*/ | |
function writeJSON(filepath, json, pretty = false) { | |
fs.writeFileSync(filepath, JSON.stringify(json, null, pretty ? 2 : 0)); | |
} | |
const uniqueDoodles = {}; | |
const allCountriesSet = new Set(); | |
const allTagsSet = new Set(); | |
allDoodles.forEach(doodle => { | |
doodle._id = generateDoodleHash(doodle); | |
uniqueDoodles[doodle._id] = doodle; | |
doodle.countries.forEach(country => { | |
country = country.trim().toLowerCase(); | |
allCountriesSet.add(country); | |
}); | |
doodle.tags.forEach(tag => { | |
tag = tag.trim().toLowerCase(); | |
allTagsSet.add(tag); | |
}); | |
}); | |
const allCountries = Array.from(allCountriesSet); | |
const allTags = Array.from(allTagsSet); | |
const cleanDoodles = allDoodles | |
.map(doodle => { | |
if (doodle.next_doodle !== null) { | |
const nextDoodle = doodle.next_doodle; | |
const nextDoodleHash = generateDoodleHash(nextDoodle); | |
doodle.next_doodle = nextDoodleHash; | |
} | |
if (doodle.prev_doodle !== null) { | |
const prevDoodle = doodle.prev_doodle; | |
const prevDoodleHash = generateDoodleHash(prevDoodle); | |
doodle.prev_doodle = prevDoodleHash; | |
} | |
doodle.related_doodles = doodle.related_doodles.map(relatedDoodle => { | |
const relatedDoodleHash = generateDoodleHash(relatedDoodle); | |
return relatedDoodleHash; | |
}); | |
doodle.history_doodles = doodle.history_doodles.map(historyDoodle => { | |
const historyDoodleHash = generateDoodleHash(historyDoodle); | |
return historyDoodleHash; | |
}); | |
return doodle; | |
}) | |
.map(doodle => { | |
doodle.countries = doodle.countries.map(country => | |
allCountries.indexOf(country.trim().toLowerCase()), | |
); | |
doodle.tags = doodle.tags.map(tag => | |
allTags.indexOf(tag.trim().toLowerCase()), | |
); | |
return doodle; | |
}) | |
.map(doodle => { | |
linkTypes.forEach(linkType => { | |
const link = doodle[linkType]; | |
switch (true) { | |
case link.startsWith('https://lh3.googleusercontent.com'): | |
doodle[linkType] = link.replace( | |
'https://lh3.googleusercontent.com', | |
0, | |
); | |
break; | |
case link.startsWith('//www.google.com/logos'): | |
doodle[linkType] = link.replace('//www.google.com/logos', 1); | |
break; | |
case link.startsWith('/logos'): | |
doodle[linkType] = link.replace('/logos', 1); | |
break; | |
case link.startsWith('//www.google.com/logos/doodles'): | |
doodle[linkType] = link.replace('//www.google.com/logos/doodles', 2); | |
break; | |
} | |
}); | |
return doodle; | |
}) | |
.map(doodle => schema.map(key => doodle[key])); | |
writeJSON('doodles.clean.json', cleanDoodles); | |
writeJSON('meta.json', { | |
countries: allCountries, | |
tags: allTags, | |
schema, | |
urlPrefixes, | |
}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const fs = require('fs'); | |
const path = require('path'); | |
const allDoodlesPath = path.join(__dirname, 'doodles.all.json'); | |
const allDoodles = require(allDoodlesPath); | |
const keys = {}; | |
const allKeys = allDoodles.forEach(doodle => { | |
Object.keys(doodle).forEach(k => keys[k] = doodle[k].constructor); | |
}); | |
console.log(keys); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment