Skip to content

Instantly share code, notes, and snippets.

# Assign identifier and collection to variables for use in final output.
.metadata.identifier as $i |
.metadata.collection as $c |
# Filter out items that do not have file metadata.
select(.files != null) |
.files |
# Filter out non EXE files.
map(
# Assign identifier and collection to variables for use in final output.
.metadata.identifier as $i |
.metadata.collection as $c |
# Filter out any items that do not have files metadata.
select(.files != null) |
# Get all non-derivative files that have a file size, and slim down the metadata.
.files |
map(
#!/usr/bin/env python
import sys
import csv
from datetime import datetime
from dateutil import tz
def get_est(date):
from_zone = tz.gettz('UTC')
to_zone = tz.gettz('EST')
#!/usr/bin/env python
import sys
import csv
from datetime import datetime
from dateutil import tz
def get_est(date):
from_zone = tz.gettz('UTC')
to_zone = tz.gettz('EST')
import aiohttp
import asyncio
@asyncio.coroutine
def get(*args, **kwargs):
response = yield from aiohttp.request('GET', *args, **kwargs)
return (yield from response.json())
@asyncio.coroutine
import aiohttp
import asyncio
@asyncio.coroutine
def get(*args, **kwargs):
response = yield from aiohttp.request('GET', *args, **kwargs)
return (yield from response.json())
@asyncio.coroutine
from functools import partial
import trollius as asyncio
from trollius import From
from internetarchive import get_item
from internetarchive.session import ArchiveSession
class Mine(object):
#!/usr/bin/env python
import logging
import sys
from clint.textui import progress
from internetarchive import get_data_miner, get_item
# Logging.
log = logging.getLogger(__name__)
#!/bin/bash
#
# Assert that a web item is fully derived by comparing the number of
# WARCs to the number of CDXs. An item that is not fully derived will
# have fewer CDXs than WARCs.
#
# Requires: https://github.com/jjjake/ia-wrapper
#
# Usage:
# $ ./assert_web_item_is_fully_derived $IDENTIFIER
#!/bin/bash
#
# Assert that a web item is fully derived by comparing the number of
# WARCs to the number of CDXs. An item that is not fully derived will
# have fewer CDXs than WARCs.
#
# Requires: https://github.com/jjjake/ia-wrapper
#
# Usage:
# $ ./assert_web_item_is_fully_derived $IDENTIFIER