Created
February 4, 2015 22:46
-
-
Save mattyb149/6b399c721a84a61f4d8b to your computer and use it in GitHub Desktop.
PDI transformation for use as an Apache Pig UDF
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?xml version="1.0" encoding="UTF-8"?> | |
<transformation> | |
<info> | |
<name>pdi-pig</name> | |
<description/> | |
<extended_description/> | |
<trans_version/> | |
<trans_type>Normal</trans_type> | |
<directory>/</directory> | |
<parameters> | |
</parameters> | |
<log> | |
<trans-log-table><connection/> | |
<schema/> | |
<table/> | |
<size_limit_lines/> | |
<interval/> | |
<timeout_days/> | |
<field><id>ID_BATCH</id><enabled>Y</enabled><name>ID_BATCH</name></field><field><id>CHANNEL_ID</id><enabled>Y</enabled><name>CHANNEL_ID</name></field><field><id>TRANSNAME</id><enabled>Y</enabled><name>TRANSNAME</name></field><field><id>STATUS</id><enabled>Y</enabled><name>STATUS</name></field><field><id>LINES_READ</id><enabled>Y</enabled><name>LINES_READ</name><subject/></field><field><id>LINES_WRITTEN</id><enabled>Y</enabled><name>LINES_WRITTEN</name><subject/></field><field><id>LINES_UPDATED</id><enabled>Y</enabled><name>LINES_UPDATED</name><subject/></field><field><id>LINES_INPUT</id><enabled>Y</enabled><name>LINES_INPUT</name><subject/></field><field><id>LINES_OUTPUT</id><enabled>Y</enabled><name>LINES_OUTPUT</name><subject/></field><field><id>LINES_REJECTED</id><enabled>Y</enabled><name>LINES_REJECTED</name><subject/></field><field><id>ERRORS</id><enabled>Y</enabled><name>ERRORS</name></field><field><id>STARTDATE</id><enabled>Y</enabled><name>STARTDATE</name></field><field><id>ENDDATE</id><enabled>Y</enabled><name>ENDDATE</name></field><field><id>LOGDATE</id><enabled>Y</enabled><name>LOGDATE</name></field><field><id>DEPDATE</id><enabled>Y</enabled><name>DEPDATE</name></field><field><id>REPLAYDATE</id><enabled>Y</enabled><name>REPLAYDATE</name></field><field><id>LOG_FIELD</id><enabled>Y</enabled><name>LOG_FIELD</name></field><field><id>EXECUTING_SERVER</id><enabled>N</enabled><name>EXECUTING_SERVER</name></field><field><id>EXECUTING_USER</id><enabled>N</enabled><name>EXECUTING_USER</name></field><field><id>CLIENT</id><enabled>N</enabled><name>CLIENT</name></field></trans-log-table> | |
<perf-log-table><connection/> | |
<schema/> | |
<table/> | |
<interval/> | |
<timeout_days/> | |
<field><id>ID_BATCH</id><enabled>Y</enabled><name>ID_BATCH</name></field><field><id>SEQ_NR</id><enabled>Y</enabled><name>SEQ_NR</name></field><field><id>LOGDATE</id><enabled>Y</enabled><name>LOGDATE</name></field><field><id>TRANSNAME</id><enabled>Y</enabled><name>TRANSNAME</name></field><field><id>STEPNAME</id><enabled>Y</enabled><name>STEPNAME</name></field><field><id>STEP_COPY</id><enabled>Y</enabled><name>STEP_COPY</name></field><field><id>LINES_READ</id><enabled>Y</enabled><name>LINES_READ</name></field><field><id>LINES_WRITTEN</id><enabled>Y</enabled><name>LINES_WRITTEN</name></field><field><id>LINES_UPDATED</id><enabled>Y</enabled><name>LINES_UPDATED</name></field><field><id>LINES_INPUT</id><enabled>Y</enabled><name>LINES_INPUT</name></field><field><id>LINES_OUTPUT</id><enabled>Y</enabled><name>LINES_OUTPUT</name></field><field><id>LINES_REJECTED</id><enabled>Y</enabled><name>LINES_REJECTED</name></field><field><id>ERRORS</id><enabled>Y</enabled><name>ERRORS</name></field><field><id>INPUT_BUFFER_ROWS</id><enabled>Y</enabled><name>INPUT_BUFFER_ROWS</name></field><field><id>OUTPUT_BUFFER_ROWS</id><enabled>Y</enabled><name>OUTPUT_BUFFER_ROWS</name></field></perf-log-table> | |
<channel-log-table><connection/> | |
<schema/> | |
<table/> | |
<timeout_days/> | |
<field><id>ID_BATCH</id><enabled>Y</enabled><name>ID_BATCH</name></field><field><id>CHANNEL_ID</id><enabled>Y</enabled><name>CHANNEL_ID</name></field><field><id>LOG_DATE</id><enabled>Y</enabled><name>LOG_DATE</name></field><field><id>LOGGING_OBJECT_TYPE</id><enabled>Y</enabled><name>LOGGING_OBJECT_TYPE</name></field><field><id>OBJECT_NAME</id><enabled>Y</enabled><name>OBJECT_NAME</name></field><field><id>OBJECT_COPY</id><enabled>Y</enabled><name>OBJECT_COPY</name></field><field><id>REPOSITORY_DIRECTORY</id><enabled>Y</enabled><name>REPOSITORY_DIRECTORY</name></field><field><id>FILENAME</id><enabled>Y</enabled><name>FILENAME</name></field><field><id>OBJECT_ID</id><enabled>Y</enabled><name>OBJECT_ID</name></field><field><id>OBJECT_REVISION</id><enabled>Y</enabled><name>OBJECT_REVISION</name></field><field><id>PARENT_CHANNEL_ID</id><enabled>Y</enabled><name>PARENT_CHANNEL_ID</name></field><field><id>ROOT_CHANNEL_ID</id><enabled>Y</enabled><name>ROOT_CHANNEL_ID</name></field></channel-log-table> | |
<step-log-table><connection/> | |
<schema/> | |
<table/> | |
<timeout_days/> | |
<field><id>ID_BATCH</id><enabled>Y</enabled><name>ID_BATCH</name></field><field><id>CHANNEL_ID</id><enabled>Y</enabled><name>CHANNEL_ID</name></field><field><id>LOG_DATE</id><enabled>Y</enabled><name>LOG_DATE</name></field><field><id>TRANSNAME</id><enabled>Y</enabled><name>TRANSNAME</name></field><field><id>STEPNAME</id><enabled>Y</enabled><name>STEPNAME</name></field><field><id>STEP_COPY</id><enabled>Y</enabled><name>STEP_COPY</name></field><field><id>LINES_READ</id><enabled>Y</enabled><name>LINES_READ</name></field><field><id>LINES_WRITTEN</id><enabled>Y</enabled><name>LINES_WRITTEN</name></field><field><id>LINES_UPDATED</id><enabled>Y</enabled><name>LINES_UPDATED</name></field><field><id>LINES_INPUT</id><enabled>Y</enabled><name>LINES_INPUT</name></field><field><id>LINES_OUTPUT</id><enabled>Y</enabled><name>LINES_OUTPUT</name></field><field><id>LINES_REJECTED</id><enabled>Y</enabled><name>LINES_REJECTED</name></field><field><id>ERRORS</id><enabled>Y</enabled><name>ERRORS</name></field><field><id>LOG_FIELD</id><enabled>N</enabled><name>LOG_FIELD</name></field></step-log-table> | |
<metrics-log-table><connection/> | |
<schema/> | |
<table/> | |
<timeout_days/> | |
<field><id>ID_BATCH</id><enabled>Y</enabled><name>ID_BATCH</name></field><field><id>CHANNEL_ID</id><enabled>Y</enabled><name>CHANNEL_ID</name></field><field><id>LOG_DATE</id><enabled>Y</enabled><name>LOG_DATE</name></field><field><id>METRICS_DATE</id><enabled>Y</enabled><name>METRICS_DATE</name></field><field><id>METRICS_CODE</id><enabled>Y</enabled><name>METRICS_CODE</name></field><field><id>METRICS_DESCRIPTION</id><enabled>Y</enabled><name>METRICS_DESCRIPTION</name></field><field><id>METRICS_SUBJECT</id><enabled>Y</enabled><name>METRICS_SUBJECT</name></field><field><id>METRICS_TYPE</id><enabled>Y</enabled><name>METRICS_TYPE</name></field><field><id>METRICS_VALUE</id><enabled>Y</enabled><name>METRICS_VALUE</name></field></metrics-log-table> | |
</log> | |
<maxdate> | |
<connection/> | |
<table/> | |
<field/> | |
<offset>0.0</offset> | |
<maxdiff>0.0</maxdiff> | |
</maxdate> | |
<size_rowset>10000</size_rowset> | |
<sleep_time_empty>50</sleep_time_empty> | |
<sleep_time_full>50</sleep_time_full> | |
<unique_connections>N</unique_connections> | |
<feedback_shown>Y</feedback_shown> | |
<feedback_size>50000</feedback_size> | |
<using_thread_priorities>Y</using_thread_priorities> | |
<shared_objects_file/> | |
<capture_step_performance>N</capture_step_performance> | |
<step_performance_capturing_delay>1000</step_performance_capturing_delay> | |
<step_performance_capturing_size_limit>100</step_performance_capturing_size_limit> | |
<dependencies> | |
</dependencies> | |
<partitionschemas> | |
</partitionschemas> | |
<slaveservers> | |
<slaveserver><name>di-server</name><hostname>localhost</hostname><port>9080</port><webAppName>pentaho-di</webAppName><username>admin</username><password>Encrypted 2be98afc86aa7f2e4bb18bd63c99dbdde</password><proxy_hostname/><proxy_port/><non_proxy_hosts/><master>Y</master></slaveserver> | |
<slaveserver><name>carte8082</name><hostname>127.0.0.1</hostname><port>8082</port><webAppName/><username>cluster</username><password>Encrypted 2be98afc86aa7f2e4cb1aa265cd86aac8</password><proxy_hostname/><proxy_port/><non_proxy_hosts/><master>N</master></slaveserver> | |
<slaveserver><name>carte8081</name><hostname>127.0.0.1</hostname><port>8081</port><webAppName/><username>cluster</username><password>Encrypted 2be98afc86aa7f2e4cb1aa265cd86aac8</password><proxy_hostname/><proxy_port/><non_proxy_hosts/><master>N</master></slaveserver> | |
<slaveserver><name>carte8083</name><hostname>127.0.0.1</hostname><port>8083</port><webAppName/><username>cluster</username><password>Encrypted 2be98afc86aa7f2e4cb1aa265cd86aac8</password><proxy_hostname/><proxy_port/><non_proxy_hosts/><master>N</master></slaveserver> | |
</slaveservers> | |
<clusterschemas> | |
</clusterschemas> | |
<created_user>-</created_user> | |
<created_date>2014/12/10 12:52:01.836</created_date> | |
<modified_user>-</modified_user> | |
<modified_date>2014/12/10 12:52:01.836</modified_date> | |
</info> | |
<notepads> | |
</notepads> | |
<order> | |
<hop> <from>INPUT</from><to>Uppercase First Name</to><enabled>Y</enabled> </hop> | |
<hop> <from>Uppercase First Name</from><to>First + last = fullname</to><enabled>Y</enabled> </hop> | |
<hop> <from>First + last = fullname</from><to>OUTPUT</to><enabled>Y</enabled> </hop> | |
<hop> <from>Text file input</from><to>Uppercase First Name</to><enabled>N</enabled> </hop> | |
</order> | |
<step> | |
<name>First + last = fullname</name> | |
<type>ConcatFields</type> | |
<description/> | |
<distribute>Y</distribute> | |
<custom_distribution/> | |
<copies>1</copies> | |
<partitioning> | |
<method>none</method> | |
<schema_name/> | |
</partitioning> | |
<separator> </separator> | |
<enclosure>"</enclosure> | |
<enclosure_forced>N</enclosure_forced> | |
<enclosure_fix_disabled>N</enclosure_fix_disabled> | |
<header>N</header> | |
<footer>N</footer> | |
<format>DOS</format> | |
<compression>None</compression> | |
<encoding/> | |
<endedLine/> | |
<fileNameInField>N</fileNameInField> | |
<fileNameField/> | |
<create_parent_folder>Y</create_parent_folder> | |
<file> | |
<name>file</name> | |
<is_command>N</is_command> | |
<servlet_output>N</servlet_output> | |
<do_not_open_new_file_init>Y</do_not_open_new_file_init> | |
<extention>txt</extention> | |
<append>N</append> | |
<split>N</split> | |
<haspartno>N</haspartno> | |
<add_date>N</add_date> | |
<add_time>N</add_time> | |
<SpecifyFormat>N</SpecifyFormat> | |
<date_time_format/> | |
<add_to_result_filenames>Y</add_to_result_filenames> | |
<pad>N</pad> | |
<fast_dump>N</fast_dump> | |
<splitevery>0</splitevery> | |
</file> | |
<fields> | |
<field> | |
<name>firstname</name> | |
<type>String</type> | |
<format/> | |
<currency/> | |
<decimal/> | |
<group/> | |
<nullif/> | |
<trim_type>none</trim_type> | |
<length>-1</length> | |
<precision>-1</precision> | |
</field> | |
<field> | |
<name>lastname</name> | |
<type>String</type> | |
<format/> | |
<currency/> | |
<decimal/> | |
<group/> | |
<nullif/> | |
<trim_type>none</trim_type> | |
<length>-1</length> | |
<precision>-1</precision> | |
</field> | |
</fields> | |
<ConcatFields> | |
<targetFieldName>fullname</targetFieldName> | |
<targetFieldLength>0</targetFieldLength> | |
<removeSelectedFields>Y</removeSelectedFields> | |
</ConcatFields> | |
<cluster_schema/> | |
<remotesteps> <input> </input> <output> </output> </remotesteps> <GUI> | |
<xloc>418</xloc> | |
<yloc>66</yloc> | |
<draw>Y</draw> | |
</GUI> | |
</step> | |
<step> | |
<name>INPUT</name> | |
<type>Injector</type> | |
<description/> | |
<distribute>Y</distribute> | |
<custom_distribution/> | |
<copies>1</copies> | |
<partitioning> | |
<method>none</method> | |
<schema_name/> | |
</partitioning> | |
<fields> <field> <name>lastname</name> | |
<type>String</type> | |
<length>-1</length> | |
<precision>-1</precision> | |
</field> <field> <name>firstname</name> | |
<type>String</type> | |
<length>-1</length> | |
<precision>-1</precision> | |
</field> </fields> <cluster_schema/> | |
<remotesteps> <input> </input> <output> </output> </remotesteps> <GUI> | |
<xloc>93</xloc> | |
<yloc>66</yloc> | |
<draw>Y</draw> | |
</GUI> | |
</step> | |
<step> | |
<name>OUTPUT</name> | |
<type>Dummy</type> | |
<description/> | |
<distribute>Y</distribute> | |
<custom_distribution/> | |
<copies>1</copies> | |
<partitioning> | |
<method>none</method> | |
<schema_name/> | |
</partitioning> | |
<cluster_schema/> | |
<remotesteps> <input> </input> <output> </output> </remotesteps> <GUI> | |
<xloc>555</xloc> | |
<yloc>66</yloc> | |
<draw>Y</draw> | |
</GUI> | |
</step> | |
<step> | |
<name>Text file input</name> | |
<type>TextFileInput</type> | |
<description/> | |
<distribute>Y</distribute> | |
<custom_distribution/> | |
<copies>1</copies> | |
<partitioning> | |
<method>none</method> | |
<schema_name/> | |
</partitioning> | |
<accept_filenames>N</accept_filenames> | |
<passing_through_fields>N</passing_through_fields> | |
<accept_field/> | |
<accept_stepname/> | |
<separator>;</separator> | |
<enclosure>"</enclosure> | |
<enclosure_breaks>N</enclosure_breaks> | |
<escapechar/> | |
<header>Y</header> | |
<nr_headerlines>1</nr_headerlines> | |
<footer>N</footer> | |
<nr_footerlines>1</nr_footerlines> | |
<line_wrapped>N</line_wrapped> | |
<nr_wraps>1</nr_wraps> | |
<layout_paged>N</layout_paged> | |
<nr_lines_per_page>80</nr_lines_per_page> | |
<nr_lines_doc_header>0</nr_lines_doc_header> | |
<noempty>Y</noempty> | |
<include>N</include> | |
<include_field/> | |
<rownum>N</rownum> | |
<rownumByFile>N</rownumByFile> | |
<rownum_field/> | |
<format>Unix</format> | |
<encoding/> | |
<add_to_result_filenames>Y</add_to_result_filenames> | |
<file> | |
<name>/Users/mburgess/customers-100.txt</name> | |
<filemask/> | |
<exclude_filemask/> | |
<file_required>N</file_required> | |
<include_subfolders>N</include_subfolders> | |
<type>CSV</type> | |
<compression>None</compression> | |
</file> | |
<filters> | |
</filters> | |
<fields> | |
<field> | |
<name>id</name> | |
<type>Integer</type> | |
<format> #</format> | |
<currency>$</currency> | |
<decimal>.</decimal> | |
<group>,</group> | |
<nullif>-</nullif> | |
<ifnull/> | |
<position>-1</position> | |
<length>15</length> | |
<precision>0</precision> | |
<trim_type>none</trim_type> | |
<repeat>N</repeat> | |
</field> | |
<field> | |
<name>lastname</name> | |
<type>String</type> | |
<format/> | |
<currency>$</currency> | |
<decimal>.</decimal> | |
<group>,</group> | |
<nullif>-</nullif> | |
<ifnull/> | |
<position>-1</position> | |
<length>10</length> | |
<precision>-1</precision> | |
<trim_type>none</trim_type> | |
<repeat>N</repeat> | |
</field> | |
<field> | |
<name>firstname</name> | |
<type>String</type> | |
<format/> | |
<currency>$</currency> | |
<decimal>.</decimal> | |
<group>,</group> | |
<nullif>-</nullif> | |
<ifnull/> | |
<position>-1</position> | |
<length>13</length> | |
<precision>-1</precision> | |
<trim_type>none</trim_type> | |
<repeat>N</repeat> | |
</field> | |
<field> | |
<name>zip</name> | |
<type>Integer</type> | |
<format> #</format> | |
<currency>$</currency> | |
<decimal>.</decimal> | |
<group>,</group> | |
<nullif>-</nullif> | |
<ifnull/> | |
<position>-1</position> | |
<length>15</length> | |
<precision>0</precision> | |
<trim_type>none</trim_type> | |
<repeat>N</repeat> | |
</field> | |
<field> | |
<name>city</name> | |
<type>String</type> | |
<format/> | |
<currency>$</currency> | |
<decimal>.</decimal> | |
<group>,</group> | |
<nullif>-</nullif> | |
<ifnull/> | |
<position>-1</position> | |
<length>8</length> | |
<precision>-1</precision> | |
<trim_type>none</trim_type> | |
<repeat>N</repeat> | |
</field> | |
<field> | |
<name>birthdate</name> | |
<type>Date</type> | |
<format>yyyy/MM/dd</format> | |
<currency>$</currency> | |
<decimal>.</decimal> | |
<group>,</group> | |
<nullif>-</nullif> | |
<ifnull/> | |
<position>-1</position> | |
<length>-1</length> | |
<precision>-1</precision> | |
<trim_type>none</trim_type> | |
<repeat>N</repeat> | |
</field> | |
<field> | |
<name>street</name> | |
<type>String</type> | |
<format/> | |
<currency>$</currency> | |
<decimal>.</decimal> | |
<group>,</group> | |
<nullif>-</nullif> | |
<ifnull/> | |
<position>-1</position> | |
<length>11</length> | |
<precision>-1</precision> | |
<trim_type>none</trim_type> | |
<repeat>N</repeat> | |
</field> | |
<field> | |
<name>housenr</name> | |
<type>Integer</type> | |
<format> #</format> | |
<currency>$</currency> | |
<decimal>.</decimal> | |
<group>,</group> | |
<nullif>-</nullif> | |
<ifnull/> | |
<position>-1</position> | |
<length>15</length> | |
<precision>0</precision> | |
<trim_type>none</trim_type> | |
<repeat>N</repeat> | |
</field> | |
<field> | |
<name>stateCode</name> | |
<type>String</type> | |
<format/> | |
<currency>$</currency> | |
<decimal>.</decimal> | |
<group>,</group> | |
<nullif>-</nullif> | |
<ifnull/> | |
<position>-1</position> | |
<length>9</length> | |
<precision>-1</precision> | |
<trim_type>none</trim_type> | |
<repeat>N</repeat> | |
</field> | |
<field> | |
<name>state</name> | |
<type>String</type> | |
<format/> | |
<currency>$</currency> | |
<decimal>.</decimal> | |
<group>,</group> | |
<nullif>-</nullif> | |
<ifnull/> | |
<position>-1</position> | |
<length>30</length> | |
<precision>-1</precision> | |
<trim_type>none</trim_type> | |
<repeat>N</repeat> | |
</field> | |
</fields> | |
<limit>0</limit> | |
<error_ignored>N</error_ignored> | |
<skip_bad_files>N</skip_bad_files> | |
<file_error_field/> | |
<file_error_message_field/> | |
<error_line_skipped>N</error_line_skipped> | |
<error_count_field/> | |
<error_fields_field/> | |
<error_text_field/> | |
<bad_line_files_destination_directory/> | |
<bad_line_files_extension>warning</bad_line_files_extension> | |
<error_line_files_destination_directory/> | |
<error_line_files_extension>error</error_line_files_extension> | |
<line_number_files_destination_directory/> | |
<line_number_files_extension>line</line_number_files_extension> | |
<date_format_lenient>Y</date_format_lenient> | |
<date_format_locale>en_US</date_format_locale> | |
<shortFileFieldName/> | |
<pathFieldName/> | |
<hiddenFieldName/> | |
<lastModificationTimeFieldName/> | |
<uriNameFieldName/> | |
<rootUriNameFieldName/> | |
<extensionFieldName/> | |
<sizeFieldName/> | |
<cluster_schema/> | |
<remotesteps> <input> </input> <output> </output> </remotesteps> <GUI> | |
<xloc>130</xloc> | |
<yloc>175</yloc> | |
<draw>Y</draw> | |
</GUI> | |
</step> | |
<step> | |
<name>Uppercase First Name</name> | |
<type>StringOperations</type> | |
<description/> | |
<distribute>Y</distribute> | |
<custom_distribution/> | |
<copies>1</copies> | |
<partitioning> | |
<method>none</method> | |
<schema_name/> | |
</partitioning> | |
<fields> | |
<field> | |
<in_stream_name>firstname</in_stream_name> | |
<out_stream_name/> | |
<trim_type>none</trim_type> | |
<lower_upper>upper</lower_upper> | |
<padding_type>none</padding_type> | |
<pad_char/> | |
<pad_len/> | |
<init_cap>no</init_cap> | |
<mask_xml>none</mask_xml> | |
<digits>none</digits> | |
<remove_special_characters>none</remove_special_characters> | |
</field> | |
<field> | |
<in_stream_name>lastname</in_stream_name> | |
<out_stream_name/> | |
<trim_type>none</trim_type> | |
<lower_upper>lower</lower_upper> | |
<padding_type>none</padding_type> | |
<pad_char/> | |
<pad_len/> | |
<init_cap>no</init_cap> | |
<mask_xml>none</mask_xml> | |
<digits>none</digits> | |
<remove_special_characters>none</remove_special_characters> | |
</field> | |
</fields> | |
<cluster_schema/> | |
<remotesteps> <input> </input> <output> </output> </remotesteps> <GUI> | |
<xloc>263</xloc> | |
<yloc>66</yloc> | |
<draw>Y</draw> | |
</GUI> | |
</step> | |
<step_error_handling> | |
</step_error_handling> | |
<slave-step-copy-partition-distribution> | |
</slave-step-copy-partition-distribution> | |
<slave_transformation>N</slave_transformation> | |
</transformation> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment