simonw/fetch_metadata_for_doc_ids.py

## fetch_metadata_for_doc_ids.py
def fetch_metadata_for_doc_ids(doc_ids, oauth_token):
    boundary = 'batch_boundary'
    headers = {
        'Authorization': 'Bearer {}'.format(oauth_token),
        'Content-Type': 'multipart/mixed; boundary=%s' % boundary,
    }
    body = ''
    for doc_id in doc_ids:
        req = 'GET https://www.googleapis.com/drive/v3/files/{}?fields=*'.format(doc_id)
        body += '--%s\n' % boundary
        body += 'Content-Type: application/http\n\n'
        body += '%s\n\n' % req
    body += '--%s--' % boundary
    response = requests.post(
        'https://www.googleapis.com/batch/drive/v3',
        data=body.encode(encoding='utf-8'),
        headers=headers
    )
    response_boundary = response.headers["Content-Type"].split(" boundary=")[1]
    chunks = response.content.split(response_boundary.encode("utf8"))[1:-1]
    # Each chunk should correspond to an incoming doc_id
    metadata_by_id = {}
    for doc_id, chunk in zip(doc_ids, chunks):
        _, http_headers, body = chunk.rsplit(b"\r\n", 1)[0].split(b"\r\n\r\n", 3)
        metadata = json.loads(body.decode("utf8"))
        metadata_by_id[doc_id] = metadata
    return metadata_by_id
	def fetch_metadata_for_doc_ids(doc_ids, oauth_token):
	boundary = 'batch_boundary'
	headers = {
	'Authorization': 'Bearer {}'.format(oauth_token),
	'Content-Type': 'multipart/mixed; boundary=%s' % boundary,
	}
	body = ''
	for doc_id in doc_ids:
	req = 'GET https://www.googleapis.com/drive/v3/files/{}?fields=*'.format(doc_id)
	body += '--%s\n' % boundary
	body += 'Content-Type: application/http\n\n'
	body += '%s\n\n' % req
	body += '--%s--' % boundary
	response = requests.post(
	'https://www.googleapis.com/batch/drive/v3',
	data=body.encode(encoding='utf-8'),
	headers=headers
	)
	response_boundary = response.headers["Content-Type"].split(" boundary=")[1]
	chunks = response.content.split(response_boundary.encode("utf8"))[1:-1]
	# Each chunk should correspond to an incoming doc_id
	metadata_by_id = {}
	for doc_id, chunk in zip(doc_ids, chunks):
	_, http_headers, body = chunk.rsplit(b"\r\n", 1)[0].split(b"\r\n\r\n", 3)
	metadata = json.loads(body.decode("utf8"))
	metadata_by_id[doc_id] = metadata
	return metadata_by_id