Created
June 19, 2024 17:40
-
-
Save orellabac/46c87f25fedb3c2c21a3608ed6b98d9a to your computer and use it in GitHub Desktop.
Snowpark Example Extracting Tables Info from an ERWIN xml file:
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
COPY INTO @MYSTAGE/erwin_sample.xml | |
FROM ( | |
SELECT $$<?xml version="1.0" standalone="yes"?> | |
<erwin | |
xmlns="http://www.erwin.com/dm" | |
xmlns:UDP="http://www.erwin.com/dm/metadata" | |
xmlns:EMX="http://www.erwin.com/dm/data" | |
xmlns:EM2="http://www.erwin.com/dm/EM2data" FileVersion="9.98.29174" Format="erwin"> | |
<EMX:Model xmlns="http://www.erwin.com/dm/data" id="{700A0000-D99A-CCC5-999A-7CCCE2230F33}+00000001" name="Model_1"> | |
<ModelEnvProps> | |
</ModelEnvProps> | |
<Entity_Groups> | |
<Entity id="{9FFFDDD4-1BBB-4999-BBB9-ACCBB88BB1DDD}+00000000" name="Employee"> | |
<EntityProps> | |
<Name>TBBBBB_XXX1</Name> | |
<Long_Id>{9FFFDDD4-1BBB-4999-BBB9-ACCBB88BB1DDD}+00000000</Long_Id> | |
<Owner_Path Tool="Y" ReadOnly="Y" Derived="Y">Model_1</Owner_Path> | |
<Type>1</Type> | |
<Comment/> | |
<Physical_Name Derived="Y">%EntityName()</Physical_Name> | |
<Dependent_Objects_Ref_Array ReadOnly="Y" Derived="Y"> | |
<Dependent_Objects_Ref index="0">{9FFFDDD4-1BBB-4999-BBB9-ACCBB88BB1EEE}+00000000</Dependent_Objects_Ref> | |
</Dependent_Objects_Ref_Array> | |
<Do_Not_Generate Derived="Y">N</Do_Not_Generate> | |
<Attributes_Order_Ref_Array> | |
<Attributes_Order_Ref index="0">{9FFFDDD4-1BBB-4999-BBB9-ACCBB88BB1EEE}+00000000</Attributes_Order_Ref> | |
<Attributes_Order_Ref index="1">{9FFFDDD4-1BBB-4999-BBB9-ACCBB88BB1EEE}+00000001</Attributes_Order_Ref> | |
</Attributes_Order_Ref_Array> | |
<Physical_Columns_Order_Ref_Array> | |
<Physical_Columns_Order_Ref index="0">{9FFFDDD4-1BBB-4999-BBB9-ACCBB88BB1EEE}+00000000</Physical_Columns_Order_Ref> | |
<Physical_Columns_Order_Ref index="1">{9FFFDDD4-1BBB-4999-BBB9-ACCBB88BB1EEE}+00000001</Physical_Columns_Order_Ref> | |
</Physical_Columns_Order_Ref_Array> | |
<Column_Groups_Ref_Array> | |
<Column_Groups_Ref index="0">{9FFFDDD4-1BBB-4999-BBB9-ACCBB88BB1EEE}+00000000</Column_Groups_Ref> | |
<Column_Groups_Ref index="1">{9FFFDDD4-1BBB-4999-BBB9-ACCBB88BB1EEE}+00000001</Column_Groups_Ref> | |
</Column_Groups_Ref_Array> | |
<User_Formatted_Name ReadOnly="Y" Derived="Y">TBBBBB_XXX1</User_Formatted_Name> | |
<Schema_Ref>{9FFFDDD4-1BBB-4999-BBB9-ACCBB88BB1EEE}+00000000</Schema_Ref> | |
<Name_Qualifier Derived="Y">ABC</Name_Qualifier> | |
<DB2_Type>0</DB2_Type> | |
<Image_Ref>{9FFFDDD4-1BBB-4999-BBB9-ACCBB88BB1EEE}+00000000</Image_Ref> | |
<Icon_Image_Ref>{9FFFDDD4-1BBB-4999-BBB9-ACCBB88BB1EEE}+00000000</Icon_Image_Ref> | |
<History_List_Array> | |
<History_List index="0">Dummy Description=""</History_List> | |
</History_List_Array> | |
</EntityProps> | |
<Attribute_Groups> | |
<Attribute id="{9FFFDDD4-1BBB-4999-BBB9-ACCBB88BB1EEE}+00000000" name="EmployeeID"> | |
<AttributeProps> | |
<Name>EmployeeID</Name> | |
<Long_Id>{9FFFDDD4-1BBB-4999-BBB9-ACCBB88BB1EEE}+00000000</Long_Id> | |
<Owner_Path Tool="Y" ReadOnly="Y" Derived="Y">TBBBBB_XXX1</Owner_Path> | |
<Type>100</Type> | |
<Physical_Data_Type>INTEGER</Physical_Data_Type> | |
<Null_Option_Type Derived="Y">0</Null_Option_Type> | |
<Label Derived="Y">%AttName:</Label> | |
<Physical_Name Derived="Y">%AttName</Physical_Name> | |
<Logical_Data_Type Derived="Y">CHAR(18)</Logical_Data_Type> | |
<Parent_Domain_Ref>{9FFFDDD4-1BBB-4999-BBB9-ACCBB88BB1EEE}+00000000</Parent_Domain_Ref> | |
<Hide_In_Logical Tool="Y" ReadOnly="Y">N</Hide_In_Logical> | |
<Hide_In_Physical Tool="Y" ReadOnly="Y">N</Hide_In_Physical> | |
<Font_Color Derived="Y">888888</Font_Color> | |
<Master_Attribute_Ref Tool="Y" ReadOnly="Y" Derived="Y">{9FFFDDD4-1BBB-4999-BBB9-ACCBB88BB1EEE}+00000000</Master_Attribute_Ref> | |
<Font_Size Derived="Y">10</Font_Size> | |
<Attribute_Order>1</Attribute_Order> | |
<Column_Order>1</Column_Order> | |
</AttributeProps> | |
</Attribute> | |
<Attribute id="{9FFFDDD4-1BBB-4999-BBB9-ACCBB88BB1EEE}+00000000" name="FIRST_NAME"> | |
<AttributeProps> | |
<Name>FIRST_NAME</Name> | |
<Long_Id>{9FFFDDD4-1BBB-4999-BBB9-ACCBB88BB1EEE}+00000000</Long_Id> | |
<Owner_Path Tool="Y" ReadOnly="Y" Derived="Y">TBBBBB_XXX1</Owner_Path> | |
<Type>100</Type> | |
<Physical_Data_Type>VARCHAR(50)</Physical_Data_Type> | |
<Null_Option_Type Derived="Y">0</Null_Option_Type> | |
<Label Derived="Y">%AttName:</Label> | |
<Physical_Name Derived="Y">%AttName</Physical_Name> | |
<Logical_Data_Type Derived="Y">CHAR(18)</Logical_Data_Type> | |
<Parent_Domain_Ref>{9FFFDDD4-1BBB-4999-BBB9-ACCBB88BB1EEE}+00000000</Parent_Domain_Ref> | |
<Hide_In_Logical Tool="Y" ReadOnly="Y">N</Hide_In_Logical> | |
<Hide_In_Physical Tool="Y" ReadOnly="Y">N</Hide_In_Physical> | |
<Font_Color Derived="Y">888888</Font_Color> | |
<Master_Attribute_Ref Tool="Y" ReadOnly="Y" Derived="Y">{9FFFDDD4-1BBB-4999-BBB9-ACCBB88BB1EEE}+00000000</Master_Attribute_Ref> | |
<Font_Size Derived="Y">10</Font_Size> | |
<Attribute_Order>2</Attribute_Order> | |
<Column_Order>2</Column_Order> | |
</AttributeProps> | |
</Attribute> | |
<Attribute id="{9FFFDDD4-1BBB-4999-BBB9-ACCBB88BB1EEE}+00000000" name="LAST_NAME"> | |
<AttributeProps> | |
<Name>LAST_NAME</Name> | |
<Long_Id>{9FFFDDD4-1BBB-4999-BBB9-ACCBB88BB1EEE}+00000000</Long_Id> | |
<Owner_Path Tool="Y" ReadOnly="Y" Derived="Y">TBBBBB_XXX1</Owner_Path> | |
<Type>100</Type> | |
<Physical_Data_Type>VARCHAR(50)</Physical_Data_Type> | |
<Null_Option_Type Derived="Y">0</Null_Option_Type> | |
<Label Derived="Y">%AttName:</Label> | |
<Physical_Name Derived="Y">%AttName</Physical_Name> | |
<Logical_Data_Type Derived="Y">CHAR(18)</Logical_Data_Type> | |
<Parent_Domain_Ref>{9FFFDDD4-1BBB-4999-BBB9-ACCBB88BB1EEE}+00000000</Parent_Domain_Ref> | |
<Hide_In_Logical Tool="Y" ReadOnly="Y">N</Hide_In_Logical> | |
<Hide_In_Physical Tool="Y" ReadOnly="Y">N</Hide_In_Physical> | |
<Font_Color Derived="Y">888888</Font_Color> | |
<Master_Attribute_Ref Tool="Y" ReadOnly="Y" Derived="Y">{9FFFDDD4-1BBB-4999-BBB9-ACCBB88BB1EEE}+00000000</Master_Attribute_Ref> | |
<Font_Size Derived="Y">10</Font_Size> | |
<Attribute_Order>3</Attribute_Order> | |
<Column_Order>3</Column_Order> | |
</AttributeProps> | |
</Attribute> | |
<Attribute id="{9FFFDDD4-1BBB-4999-BBB9-ACCBB88BB1EEE}+00000000" name="HIRE_DATE"> | |
<AttributeProps> | |
<Name>HIRE_DATE</Name> | |
<Long_Id>{9FFFDDD4-1BBB-4999-BBB9-ACCBB88BB1EEE}+00000000</Long_Id> | |
<Owner_Path Tool="Y" ReadOnly="Y" Derived="Y">TBBBBB_XXX1</Owner_Path> | |
<Type>100</Type> | |
<Physical_Data_Type>DATE</Physical_Data_Type> | |
<Null_Option_Type Derived="Y">0</Null_Option_Type> | |
<Label Derived="Y">%AttName:</Label> | |
<Physical_Name Derived="Y">%AttName</Physical_Name> | |
<Logical_Data_Type Derived="Y">CHAR(18)</Logical_Data_Type> | |
<Parent_Domain_Ref>{9FFFDDD4-1BBB-4999-BBB9-ACCBB88BB1EEE}+00000000</Parent_Domain_Ref> | |
<Hide_In_Logical Tool="Y" ReadOnly="Y">N</Hide_In_Logical> | |
<Hide_In_Physical Tool="Y" ReadOnly="Y">N</Hide_In_Physical> | |
<Font_Color Derived="Y">888888</Font_Color> | |
<Master_Attribute_Ref Tool="Y" ReadOnly="Y" Derived="Y">{9FFFDDD4-1BBB-4999-BBB9-ACCBB88BB1EEE}+00000000</Master_Attribute_Ref> | |
<Font_Size Derived="Y">10</Font_Size> | |
<Attribute_Order>4</Attribute_Order> | |
<Column_Order>4</Column_Order> | |
</AttributeProps> | |
</Attribute> | |
</Attribute_Groups> | |
</Entity> | |
<Entity id="{9FFFDDD4-1BBB-4999-BBB9-ACCBB88BB1FFF}+00000000" name="Department"> | |
<EntityProps> | |
<Name>Department</Name> | |
<Long_Id>{9FFFDDD4-1BBB-4999-BBB9-ACCBB88BB1FFF}+00000000</Long_Id> | |
<Label>%EntName:</Label> | |
<Comment>Department</Comment> | |
<Physical_Name>DEPARTMENT</Physical_Name> | |
<Entity_Order>1</Entity_Order> | |
<Name_Qualifier Derived="Y">DEF</Name_Qualifier> | |
</EntityProps> | |
<Attribute_Groups> | |
<Attribute id="{9FFFDDD4-1BBB-4999-BBB9-ACCBB88BB1FFF}+00000000" name="DepartmentID"> | |
<AttributeProps> | |
<Name>DepartmentID</Name> | |
<Long_Id>{9FFFDDD4-1BBB-4999-BBB9-ACCBB88BB1FFF}+00000000</Long_Id> | |
<Physical_Data_Type>INTEGER</Physical_Data_Type> | |
<Null_Option_Type>1</Null_Option_Type> | |
<Label>%AttName:</Label> | |
<Comment>Deparment Identification</Comment> | |
<Physical_Name>DEPT_ID</Physical_Name> | |
<Logical_Data_Type>INTEGER</Logical_Data_Type> | |
<Attribute_Order>1</Attribute_Order> | |
<Column_Order>1</Column_Order> | |
</AttributeProps> | |
</Attribute> | |
<Attribute id="{9FFFDDD4-1BBB-4999-BBB9-ACCBB88BB1FFF}+00000000" name="DepartmentName"> | |
<AttributeProps> | |
<Name>DepartmentName</Name> | |
<Long_Id>{9FFFDDD4-1BBB-4999-BBB9-ACCBB88BB1CCCC}+00000000</Long_Id> | |
<Physical_Data_Type>VARCHAR(100)</Physical_Data_Type> | |
<Null_Option_Type>1</Null_Option_Type> | |
<Label>%AttName:</Label> | |
<Comment>Deparment Identification</Comment> | |
<Physical_Name>DEPT_ID</Physical_Name> | |
<Logical_Data_Type>INTEGER</Logical_Data_Type> | |
<Attribute_Order>1</Attribute_Order> | |
<Column_Order>1</Column_Order> | |
</AttributeProps> | |
</Attribute> | |
</Attribute_Groups> | |
</Entity> | |
</Entity_Groups> | |
</EMX:Model> | |
</erwin> | |
$$ | |
) | |
FILE_FORMAT=(TYPE=CSV COMPRESSION=NONE RECORD_DELIMITER=NONE FIELD_DELIMITER=NONE) OVERWRITE=TRUE SINGLE=TRUE; | |
CREATE OR REPLACE FUNCTION ERWIN_MODEL_EXTRACTOR(xml_filename STRING) | |
RETURNS TABLE (filename STRING, schema STRING, table_name STRING, column_name STRING, datatype STRING, column_order integer, comment string ) | |
LANGUAGE PYTHON | |
RUNTIME_VERSION = 3.11 | |
PACKAGES = ( 'snowflake-snowpark-python','lxml') | |
HANDLER = 'ErwinModelReader' | |
AS $$ | |
from snowflake.snowpark.files import SnowflakeFile | |
import logging | |
from lxml import etree, objectify | |
class ErwinModelReader: | |
def __init__(self): | |
... | |
def process(self, xml_filename:str): | |
with SnowflakeFile.open(xml_filename,"r",require_scoped_url=False) as f: | |
all_xml_content = f.read() | |
all_xml_content = all_xml_content.replace('encoding="UTF-8"', '') | |
root = objectify.fromstring(all_xml_content) | |
model = root['{http://www.erwin.com/dm/data}Model'] | |
for table in model.Entity_Groups.getchildren(): | |
try: | |
schema = table.EntityProps.Name_Qualifier | |
table_name = table.EntityProps.Name | |
for column in table.Attribute_Groups.getchildren(): | |
col_name = column.AttributeProps.Name | |
data_type = column.AttributeProps.Physical_Data_Type | |
column_order = column.AttributeProps.Column_Order | |
comment = "" | |
try: | |
comment = column.AttributeProps.Comment | |
except: | |
pass | |
yield (xml_filename, schema, table_name, col_name, data_type, int(column_order), comment) | |
except Exception as ex: | |
logging.error(ex) | |
$$; | |
with | |
xml_files as (select * from directory(@mystage) where startswith(relative_path,'erwin') and endswith(relative_path,'.xml')) | |
select r.* from xml_files, TABLE(ERWIN_MODEL_EXTRACTOR('@mystage/' || relative_path)) r; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment