Skip to content

Instantly share code, notes, and snippets.

@ozkatz
Created February 19, 2021 18:20
Show Gist options
  • Save ozkatz/b106da34ac4c8608ccf5c1190ea69a40 to your computer and use it in GitHub Desktop.
Save ozkatz/b106da34ac4c8608ccf5c1190ea69a40 to your computer and use it in GitHub Desktop.
lakeFS python resolver (returns S3 addresses for objects under a prefix)
#!/usr/bin/env python
from urllib.parse import urlparse
from typing import Iterator
from bravado.requests_client import RequestsClient
from bravado.client import SwaggerClient
class lakeFSResolver(object):
"""
An example lakeFS client that allows resolving
lakeFS paths to their underlying object store locations
"""
def __init__(self, access_key: str, secret_key: str, base_url: str = 'http://localhost:8000'):
url = urlparse(base_url)
http_client = RequestsClient()
http_client.set_basic_auth(url.netloc, access_key, secret_key)
self._client = SwaggerClient.from_url(
f'{base_url}/swagger.json',
http_client=http_client,
config={"validate_swagger_spec": False})
def resolve_paths(self, repo: str, ref: str, prefix: str) -> Iterator[str]:
"""
Returns the object store addresses for the given ref and prefix
"""
after = ''
while True:
result = self._client.objects.listObjects(
repository=repo,
ref=ref,
amount=1000,
prefix=prefix,
after=after
).response().result
for obj in result.get('results'):
if obj.path_type == 'object':
yield obj.physical_address
if not result.get('pagination').has_more:
return # no more things.
after = result.get('pagination').next_offset
# Using the client to return S3 locations:
def example_usage():
lakefs = lakeFSResolver('AKIAJ4EZNLHXCH5BCR2Q', '6Ntku2+lN9Jbag4HAsyUWJKr7xO5ukE+BcDcofYt')
for obj in lakefs.resolve_paths('my-repo', 'master', prefix='collections/'):
print(obj)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment