Skip to content

Instantly share code, notes, and snippets.

@orellabac
Created June 19, 2024 17:40
Show Gist options
  • Save orellabac/46c87f25fedb3c2c21a3608ed6b98d9a to your computer and use it in GitHub Desktop.
Save orellabac/46c87f25fedb3c2c21a3608ed6b98d9a to your computer and use it in GitHub Desktop.
Snowpark Example Extracting Tables Info from an ERWIN xml file:
COPY INTO @MYSTAGE/erwin_sample.xml
FROM (
SELECT $$<?xml version="1.0" standalone="yes"?>
<erwin
xmlns="http://www.erwin.com/dm"
xmlns:UDP="http://www.erwin.com/dm/metadata"
xmlns:EMX="http://www.erwin.com/dm/data"
xmlns:EM2="http://www.erwin.com/dm/EM2data" FileVersion="9.98.29174" Format="erwin">
<EMX:Model xmlns="http://www.erwin.com/dm/data" id="{700A0000-D99A-CCC5-999A-7CCCE2230F33}+00000001" name="Model_1">
<ModelEnvProps>
</ModelEnvProps>
<Entity_Groups>
<Entity id="{9FFFDDD4-1BBB-4999-BBB9-ACCBB88BB1DDD}+00000000" name="Employee">
<EntityProps>
<Name>TBBBBB_XXX1</Name>
<Long_Id>{9FFFDDD4-1BBB-4999-BBB9-ACCBB88BB1DDD}+00000000</Long_Id>
<Owner_Path Tool="Y" ReadOnly="Y" Derived="Y">Model_1</Owner_Path>
<Type>1</Type>
<Comment/>
<Physical_Name Derived="Y">%EntityName()</Physical_Name>
<Dependent_Objects_Ref_Array ReadOnly="Y" Derived="Y">
<Dependent_Objects_Ref index="0">{9FFFDDD4-1BBB-4999-BBB9-ACCBB88BB1EEE}+00000000</Dependent_Objects_Ref>
</Dependent_Objects_Ref_Array>
<Do_Not_Generate Derived="Y">N</Do_Not_Generate>
<Attributes_Order_Ref_Array>
<Attributes_Order_Ref index="0">{9FFFDDD4-1BBB-4999-BBB9-ACCBB88BB1EEE}+00000000</Attributes_Order_Ref>
<Attributes_Order_Ref index="1">{9FFFDDD4-1BBB-4999-BBB9-ACCBB88BB1EEE}+00000001</Attributes_Order_Ref>
</Attributes_Order_Ref_Array>
<Physical_Columns_Order_Ref_Array>
<Physical_Columns_Order_Ref index="0">{9FFFDDD4-1BBB-4999-BBB9-ACCBB88BB1EEE}+00000000</Physical_Columns_Order_Ref>
<Physical_Columns_Order_Ref index="1">{9FFFDDD4-1BBB-4999-BBB9-ACCBB88BB1EEE}+00000001</Physical_Columns_Order_Ref>
</Physical_Columns_Order_Ref_Array>
<Column_Groups_Ref_Array>
<Column_Groups_Ref index="0">{9FFFDDD4-1BBB-4999-BBB9-ACCBB88BB1EEE}+00000000</Column_Groups_Ref>
<Column_Groups_Ref index="1">{9FFFDDD4-1BBB-4999-BBB9-ACCBB88BB1EEE}+00000001</Column_Groups_Ref>
</Column_Groups_Ref_Array>
<User_Formatted_Name ReadOnly="Y" Derived="Y">TBBBBB_XXX1</User_Formatted_Name>
<Schema_Ref>{9FFFDDD4-1BBB-4999-BBB9-ACCBB88BB1EEE}+00000000</Schema_Ref>
<Name_Qualifier Derived="Y">ABC</Name_Qualifier>
<DB2_Type>0</DB2_Type>
<Image_Ref>{9FFFDDD4-1BBB-4999-BBB9-ACCBB88BB1EEE}+00000000</Image_Ref>
<Icon_Image_Ref>{9FFFDDD4-1BBB-4999-BBB9-ACCBB88BB1EEE}+00000000</Icon_Image_Ref>
<History_List_Array>
<History_List index="0">Dummy Description=""</History_List>
</History_List_Array>
</EntityProps>
<Attribute_Groups>
<Attribute id="{9FFFDDD4-1BBB-4999-BBB9-ACCBB88BB1EEE}+00000000" name="EmployeeID">
<AttributeProps>
<Name>EmployeeID</Name>
<Long_Id>{9FFFDDD4-1BBB-4999-BBB9-ACCBB88BB1EEE}+00000000</Long_Id>
<Owner_Path Tool="Y" ReadOnly="Y" Derived="Y">TBBBBB_XXX1</Owner_Path>
<Type>100</Type>
<Physical_Data_Type>INTEGER</Physical_Data_Type>
<Null_Option_Type Derived="Y">0</Null_Option_Type>
<Label Derived="Y">%AttName:</Label>
<Physical_Name Derived="Y">%AttName</Physical_Name>
<Logical_Data_Type Derived="Y">CHAR(18)</Logical_Data_Type>
<Parent_Domain_Ref>{9FFFDDD4-1BBB-4999-BBB9-ACCBB88BB1EEE}+00000000</Parent_Domain_Ref>
<Hide_In_Logical Tool="Y" ReadOnly="Y">N</Hide_In_Logical>
<Hide_In_Physical Tool="Y" ReadOnly="Y">N</Hide_In_Physical>
<Font_Color Derived="Y">888888</Font_Color>
<Master_Attribute_Ref Tool="Y" ReadOnly="Y" Derived="Y">{9FFFDDD4-1BBB-4999-BBB9-ACCBB88BB1EEE}+00000000</Master_Attribute_Ref>
<Font_Size Derived="Y">10</Font_Size>
<Attribute_Order>1</Attribute_Order>
<Column_Order>1</Column_Order>
</AttributeProps>
</Attribute>
<Attribute id="{9FFFDDD4-1BBB-4999-BBB9-ACCBB88BB1EEE}+00000000" name="FIRST_NAME">
<AttributeProps>
<Name>FIRST_NAME</Name>
<Long_Id>{9FFFDDD4-1BBB-4999-BBB9-ACCBB88BB1EEE}+00000000</Long_Id>
<Owner_Path Tool="Y" ReadOnly="Y" Derived="Y">TBBBBB_XXX1</Owner_Path>
<Type>100</Type>
<Physical_Data_Type>VARCHAR(50)</Physical_Data_Type>
<Null_Option_Type Derived="Y">0</Null_Option_Type>
<Label Derived="Y">%AttName:</Label>
<Physical_Name Derived="Y">%AttName</Physical_Name>
<Logical_Data_Type Derived="Y">CHAR(18)</Logical_Data_Type>
<Parent_Domain_Ref>{9FFFDDD4-1BBB-4999-BBB9-ACCBB88BB1EEE}+00000000</Parent_Domain_Ref>
<Hide_In_Logical Tool="Y" ReadOnly="Y">N</Hide_In_Logical>
<Hide_In_Physical Tool="Y" ReadOnly="Y">N</Hide_In_Physical>
<Font_Color Derived="Y">888888</Font_Color>
<Master_Attribute_Ref Tool="Y" ReadOnly="Y" Derived="Y">{9FFFDDD4-1BBB-4999-BBB9-ACCBB88BB1EEE}+00000000</Master_Attribute_Ref>
<Font_Size Derived="Y">10</Font_Size>
<Attribute_Order>2</Attribute_Order>
<Column_Order>2</Column_Order>
</AttributeProps>
</Attribute>
<Attribute id="{9FFFDDD4-1BBB-4999-BBB9-ACCBB88BB1EEE}+00000000" name="LAST_NAME">
<AttributeProps>
<Name>LAST_NAME</Name>
<Long_Id>{9FFFDDD4-1BBB-4999-BBB9-ACCBB88BB1EEE}+00000000</Long_Id>
<Owner_Path Tool="Y" ReadOnly="Y" Derived="Y">TBBBBB_XXX1</Owner_Path>
<Type>100</Type>
<Physical_Data_Type>VARCHAR(50)</Physical_Data_Type>
<Null_Option_Type Derived="Y">0</Null_Option_Type>
<Label Derived="Y">%AttName:</Label>
<Physical_Name Derived="Y">%AttName</Physical_Name>
<Logical_Data_Type Derived="Y">CHAR(18)</Logical_Data_Type>
<Parent_Domain_Ref>{9FFFDDD4-1BBB-4999-BBB9-ACCBB88BB1EEE}+00000000</Parent_Domain_Ref>
<Hide_In_Logical Tool="Y" ReadOnly="Y">N</Hide_In_Logical>
<Hide_In_Physical Tool="Y" ReadOnly="Y">N</Hide_In_Physical>
<Font_Color Derived="Y">888888</Font_Color>
<Master_Attribute_Ref Tool="Y" ReadOnly="Y" Derived="Y">{9FFFDDD4-1BBB-4999-BBB9-ACCBB88BB1EEE}+00000000</Master_Attribute_Ref>
<Font_Size Derived="Y">10</Font_Size>
<Attribute_Order>3</Attribute_Order>
<Column_Order>3</Column_Order>
</AttributeProps>
</Attribute>
<Attribute id="{9FFFDDD4-1BBB-4999-BBB9-ACCBB88BB1EEE}+00000000" name="HIRE_DATE">
<AttributeProps>
<Name>HIRE_DATE</Name>
<Long_Id>{9FFFDDD4-1BBB-4999-BBB9-ACCBB88BB1EEE}+00000000</Long_Id>
<Owner_Path Tool="Y" ReadOnly="Y" Derived="Y">TBBBBB_XXX1</Owner_Path>
<Type>100</Type>
<Physical_Data_Type>DATE</Physical_Data_Type>
<Null_Option_Type Derived="Y">0</Null_Option_Type>
<Label Derived="Y">%AttName:</Label>
<Physical_Name Derived="Y">%AttName</Physical_Name>
<Logical_Data_Type Derived="Y">CHAR(18)</Logical_Data_Type>
<Parent_Domain_Ref>{9FFFDDD4-1BBB-4999-BBB9-ACCBB88BB1EEE}+00000000</Parent_Domain_Ref>
<Hide_In_Logical Tool="Y" ReadOnly="Y">N</Hide_In_Logical>
<Hide_In_Physical Tool="Y" ReadOnly="Y">N</Hide_In_Physical>
<Font_Color Derived="Y">888888</Font_Color>
<Master_Attribute_Ref Tool="Y" ReadOnly="Y" Derived="Y">{9FFFDDD4-1BBB-4999-BBB9-ACCBB88BB1EEE}+00000000</Master_Attribute_Ref>
<Font_Size Derived="Y">10</Font_Size>
<Attribute_Order>4</Attribute_Order>
<Column_Order>4</Column_Order>
</AttributeProps>
</Attribute>
</Attribute_Groups>
</Entity>
<Entity id="{9FFFDDD4-1BBB-4999-BBB9-ACCBB88BB1FFF}+00000000" name="Department">
<EntityProps>
<Name>Department</Name>
<Long_Id>{9FFFDDD4-1BBB-4999-BBB9-ACCBB88BB1FFF}+00000000</Long_Id>
<Label>%EntName:</Label>
<Comment>Department</Comment>
<Physical_Name>DEPARTMENT</Physical_Name>
<Entity_Order>1</Entity_Order>
<Name_Qualifier Derived="Y">DEF</Name_Qualifier>
</EntityProps>
<Attribute_Groups>
<Attribute id="{9FFFDDD4-1BBB-4999-BBB9-ACCBB88BB1FFF}+00000000" name="DepartmentID">
<AttributeProps>
<Name>DepartmentID</Name>
<Long_Id>{9FFFDDD4-1BBB-4999-BBB9-ACCBB88BB1FFF}+00000000</Long_Id>
<Physical_Data_Type>INTEGER</Physical_Data_Type>
<Null_Option_Type>1</Null_Option_Type>
<Label>%AttName:</Label>
<Comment>Deparment Identification</Comment>
<Physical_Name>DEPT_ID</Physical_Name>
<Logical_Data_Type>INTEGER</Logical_Data_Type>
<Attribute_Order>1</Attribute_Order>
<Column_Order>1</Column_Order>
</AttributeProps>
</Attribute>
<Attribute id="{9FFFDDD4-1BBB-4999-BBB9-ACCBB88BB1FFF}+00000000" name="DepartmentName">
<AttributeProps>
<Name>DepartmentName</Name>
<Long_Id>{9FFFDDD4-1BBB-4999-BBB9-ACCBB88BB1CCCC}+00000000</Long_Id>
<Physical_Data_Type>VARCHAR(100)</Physical_Data_Type>
<Null_Option_Type>1</Null_Option_Type>
<Label>%AttName:</Label>
<Comment>Deparment Identification</Comment>
<Physical_Name>DEPT_ID</Physical_Name>
<Logical_Data_Type>INTEGER</Logical_Data_Type>
<Attribute_Order>1</Attribute_Order>
<Column_Order>1</Column_Order>
</AttributeProps>
</Attribute>
</Attribute_Groups>
</Entity>
</Entity_Groups>
</EMX:Model>
</erwin>
$$
)
FILE_FORMAT=(TYPE=CSV COMPRESSION=NONE RECORD_DELIMITER=NONE FIELD_DELIMITER=NONE) OVERWRITE=TRUE SINGLE=TRUE;
CREATE OR REPLACE FUNCTION ERWIN_MODEL_EXTRACTOR(xml_filename STRING)
RETURNS TABLE (filename STRING, schema STRING, table_name STRING, column_name STRING, datatype STRING, column_order integer, comment string )
LANGUAGE PYTHON
RUNTIME_VERSION = 3.11
PACKAGES = ( 'snowflake-snowpark-python','lxml')
HANDLER = 'ErwinModelReader'
AS $$
from snowflake.snowpark.files import SnowflakeFile
import logging
from lxml import etree, objectify
class ErwinModelReader:
def __init__(self):
...
def process(self, xml_filename:str):
with SnowflakeFile.open(xml_filename,"r",require_scoped_url=False) as f:
all_xml_content = f.read()
all_xml_content = all_xml_content.replace('encoding="UTF-8"', '')
root = objectify.fromstring(all_xml_content)
model = root['{http://www.erwin.com/dm/data}Model']
for table in model.Entity_Groups.getchildren():
try:
schema = table.EntityProps.Name_Qualifier
table_name = table.EntityProps.Name
for column in table.Attribute_Groups.getchildren():
col_name = column.AttributeProps.Name
data_type = column.AttributeProps.Physical_Data_Type
column_order = column.AttributeProps.Column_Order
comment = ""
try:
comment = column.AttributeProps.Comment
except:
pass
yield (xml_filename, schema, table_name, col_name, data_type, int(column_order), comment)
except Exception as ex:
logging.error(ex)
$$;
with
xml_files as (select * from directory(@mystage) where startswith(relative_path,'erwin') and endswith(relative_path,'.xml'))
select r.* from xml_files, TABLE(ERWIN_MODEL_EXTRACTOR('@mystage/' || relative_path)) r;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment