Last active
August 14, 2021 19:39
-
-
Save stkbailey/8702fb04b079adb22bca89a72b5bdbf7 to your computer and use it in GitHub Desktop.
Convert Singer Stream Catalog to Meltano SDK PropertiesList
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import pathlib | |
import sys | |
from typing import List | |
INDENTATION = " " * 4 | |
TYPE_DICT = { | |
"string": "th.StringType", | |
"integer": "th.IntegerType", | |
"number": "th.NumberType", | |
"array": "th.ArrayType", | |
"object": "th.ObjectType", | |
"null": "th.StringType", | |
"boolean": "th.BooleanType", | |
} | |
def parse_attribute(key, value, depth=0) -> str: | |
"Each attribute should start on a newline." | |
print(f"Parsing {key}: {value}") | |
datatype = extract_datatype(value.get("type", "null")) | |
print(f"..Identified as {datatype}") | |
if datatype == "array": | |
return print_array_attribute(key, value, depth) | |
elif datatype == "object": | |
return print_object_attribute(key, value, depth) | |
else: | |
return print_normal_attribute(key, value, depth) | |
def print_normal_attribute(key, value, depth=0) -> str: | |
raw_type = extract_datatype(value.get("type", "null")) | |
datatype = TYPE_DICT[raw_type] | |
prefix = "\n" + (depth * INDENTATION) | |
translation = prefix + f'th.Property("{key}", {datatype}),' | |
return translation | |
def print_array_attribute(key, value, depth=0) -> str: | |
datatype = TYPE_DICT["array"] | |
item_type = extract_datatype(value.get("items", {}).get("type", "null")) | |
prefix = "\n" + (depth * INDENTATION) | |
translation = prefix + f'th.Property("{key}", {datatype}(' | |
if item_type == "object": | |
object = value.get("items", {}) | |
if len(object.get("properties", {}).keys()) > 0: | |
additional = print_object_attribute(None, object, depth + 1) | |
translation = translation + additional + "\n" + (depth * INDENTATION) + ")," | |
else: | |
translation = translation + ")," | |
# translation = translation + additional# + '\n' + (depth * INDENTATION) + '),' | |
else: | |
translation = translation + f"{TYPE_DICT[item_type]}))," | |
return translation | |
def print_object_attribute(key, value, depth=0) -> str: | |
datatype = TYPE_DICT["object"] | |
prefix = "\n" + (depth * INDENTATION) | |
if key is not None: | |
translation = prefix + f'th.Property("{key}", {datatype}(' | |
else: | |
translation = prefix + f"{datatype}(" | |
properties = value.get("properties", {}) | |
if len(properties.keys()) > 0: | |
additional = parse_object_attributes(properties, depth + 1) | |
translation = translation + additional | |
translation = translation + prefix + "))," | |
else: | |
translation = translation + "))," | |
return translation | |
def extract_datatype(value): | |
if type(value) == list: | |
first_value = [x for x in value if x != "null"][0] | |
return first_value | |
return value | |
def parse_object_attributes(properties, depth=0) -> str: | |
if len(properties.keys()) == 0: | |
return "()" | |
else: | |
attributes = "" | |
for k, v in properties.items(): | |
translation = parse_attribute(k, v, depth) | |
attributes = attributes + translation | |
return attributes | |
def parse_json_schema(stream_schema: dict): | |
template = f"""from singer_sdk import typing as th | |
schema = th.PropertiesList(\nCONTENTS\n).to_dict() | |
""" | |
attributes = parse_object_attributes(stream_schema, depth=1) | |
return template.replace("CONTENTS", attributes.strip("\n")) | |
if __name__ == "__main__": | |
input_fname = sys.argv[1] | |
output_fname = sys.argv[2] | |
txt = pathlib.Path(input_fname).read_text() | |
catalog = json.loads(txt) | |
entrypoint = catalog["properties"] | |
attributes = parse_json_schema(entrypoint) | |
output = pathlib.Path(output_fname).write_text(attributes) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment