Skip to content

Instantly share code, notes, and snippets.

@sktse
Created July 9, 2020 14:48
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sktse/f1b3bb7bceffe47637eb572a5e750f7e to your computer and use it in GitHub Desktop.
Save sktse/f1b3bb7bceffe47637eb572a5e750f7e to your computer and use it in GitHub Desktop.
Code snippet to programmatically generate the pyspark structs from RETS table metadata
def generate_pyspark_structs(client):
property_resource = client.get_resource("Property")
property_class = property_resource.get_class("Property")
meta = property_class.table
fields = []
for col in meta:
# cols.append(dict(col))
name = col["SystemName"]
data_type = col["DataType"]
interpretation = col["Interpretation"]
spark_type = None
if data_type == "Character":
# String type. Need to check if it is an array.
if interpretation == "LookupMulti":
# This is an array of enums
spark_type = "ArrayType(StringType(), False)"
elif interpretation == "Lookup":
# This is a _single_ enum.
spark_type = "StringType()"
else:
# Free text
spark_type = "StringType()"
elif data_type == "Boolean":
spark_type = "BooleanType()"
elif data_type == "Decimal":
spark_type = "DecimalType()"
elif data_type == "Int":
spark_type = "IntegerType()"
elif data_type == "DateTime":
spark_type = "TimestampType()"
elif data_type == "Long":
spark_type = "LongType()"
elif data_type == "Date":
spark_type = "DateType()"
else:
raise ValueError(f"Unknown type: {data_type}")
struct_field_string = \
f' StructField("{name}", {spark_type}, nullable=True),'
fields.append(struct_field_string)
with open(f"./tests/data/test.py", "w") as outfile:
outfile.write("\n".join(fields))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment