Skip to content

Instantly share code, notes, and snippets.

@smashercosmo
Created March 9, 2023 22:27
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save smashercosmo/963a1acc752439d04b4bc696e9ad2f37 to your computer and use it in GitHub Desktop.
Save smashercosmo/963a1acc752439d04b4bc696e9ad2f37 to your computer and use it in GitHub Desktop.
Extract tmdb ids
let got = require('got')
let zlib = require('zlib')
let os = require('os')
module.exports.getIds = function getIds(event, context, callback) {
  let unzip = zlib.createGunzip()
  /* TODO url should be constructed based on the current date */
  let stream = got.stream('https://files.tmdb.org/p/exports/movie_ids_01_22_2020.json.gz', )
  stream.pipe(unzip)
  let incompleteLine = ''
  unzip.on('data', data => {
    let chunkStr = data.toString().trim()
    if (incompleteLine) {
      chunkStr = incompleteLine + chunkStr
      incompleteLine = ''
    }
    let chunkArr = chunkStr.split(os.EOL)
    let isNotValidChunk = chunkStr[chunkStr.length - 1] !== '}'
    if (isNotValidChunk) {
      incompleteLine = chunkArr.pop()
    }
    /* TODO this chunk of ids should be sent further to (probably) SQS */
    let ids = chunkArr
      .map(chunk => JSON.parse(chunk))
      .filter(obj => !obj.adult && !obj.video)
      .map(obj => obj.id)
  })
  unzip.on('end', () => {
    callback(null)
  })
  unzip.on('error', error => {
    callback(error)
  })
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment