Skip to content

Instantly share code, notes, and snippets.

@stkbailey
Last active August 14, 2021 19:39
Show Gist options
  • Save stkbailey/8702fb04b079adb22bca89a72b5bdbf7 to your computer and use it in GitHub Desktop.
Save stkbailey/8702fb04b079adb22bca89a72b5bdbf7 to your computer and use it in GitHub Desktop.
Convert Singer Stream Catalog to Meltano SDK PropertiesList
import json
import pathlib
import sys
from typing import List
INDENTATION = " " * 4
TYPE_DICT = {
"string": "th.StringType",
"integer": "th.IntegerType",
"number": "th.NumberType",
"array": "th.ArrayType",
"object": "th.ObjectType",
"null": "th.StringType",
"boolean": "th.BooleanType",
}
def parse_attribute(key, value, depth=0) -> str:
"Each attribute should start on a newline."
print(f"Parsing {key}: {value}")
datatype = extract_datatype(value.get("type", "null"))
print(f"..Identified as {datatype}")
if datatype == "array":
return print_array_attribute(key, value, depth)
elif datatype == "object":
return print_object_attribute(key, value, depth)
else:
return print_normal_attribute(key, value, depth)
def print_normal_attribute(key, value, depth=0) -> str:
raw_type = extract_datatype(value.get("type", "null"))
datatype = TYPE_DICT[raw_type]
prefix = "\n" + (depth * INDENTATION)
translation = prefix + f'th.Property("{key}", {datatype}),'
return translation
def print_array_attribute(key, value, depth=0) -> str:
datatype = TYPE_DICT["array"]
item_type = extract_datatype(value.get("items", {}).get("type", "null"))
prefix = "\n" + (depth * INDENTATION)
translation = prefix + f'th.Property("{key}", {datatype}('
if item_type == "object":
object = value.get("items", {})
if len(object.get("properties", {}).keys()) > 0:
additional = print_object_attribute(None, object, depth + 1)
translation = translation + additional + "\n" + (depth * INDENTATION) + "),"
else:
translation = translation + "),"
# translation = translation + additional# + '\n' + (depth * INDENTATION) + '),'
else:
translation = translation + f"{TYPE_DICT[item_type]})),"
return translation
def print_object_attribute(key, value, depth=0) -> str:
datatype = TYPE_DICT["object"]
prefix = "\n" + (depth * INDENTATION)
if key is not None:
translation = prefix + f'th.Property("{key}", {datatype}('
else:
translation = prefix + f"{datatype}("
properties = value.get("properties", {})
if len(properties.keys()) > 0:
additional = parse_object_attributes(properties, depth + 1)
translation = translation + additional
translation = translation + prefix + ")),"
else:
translation = translation + ")),"
return translation
def extract_datatype(value):
if type(value) == list:
first_value = [x for x in value if x != "null"][0]
return first_value
return value
def parse_object_attributes(properties, depth=0) -> str:
if len(properties.keys()) == 0:
return "()"
else:
attributes = ""
for k, v in properties.items():
translation = parse_attribute(k, v, depth)
attributes = attributes + translation
return attributes
def parse_json_schema(stream_schema: dict):
template = f"""from singer_sdk import typing as th
schema = th.PropertiesList(\nCONTENTS\n).to_dict()
"""
attributes = parse_object_attributes(stream_schema, depth=1)
return template.replace("CONTENTS", attributes.strip("\n"))
if __name__ == "__main__":
input_fname = sys.argv[1]
output_fname = sys.argv[2]
txt = pathlib.Path(input_fname).read_text()
catalog = json.loads(txt)
entrypoint = catalog["properties"]
attributes = parse_json_schema(entrypoint)
output = pathlib.Path(output_fname).write_text(attributes)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment