Created
December 9, 2014 03:03
-
-
Save mattyb149/279b03dbe65efff3612e to your computer and use it in GitHub Desktop.
Determine sortedness of data in PDI with Spearman coefficient
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?xml version="1.0" encoding="UTF-8"?> | |
<transformation> | |
<info> | |
<name>Spearman_sort_test</name> | |
<description/> | |
<extended_description/> | |
<trans_version/> | |
<trans_type>Normal</trans_type> | |
<directory>/</directory> | |
<parameters> | |
</parameters> | |
<log> | |
<trans-log-table><connection/> | |
<schema/> | |
<table/> | |
<size_limit_lines/> | |
<interval/> | |
<timeout_days/> | |
<field><id>ID_BATCH</id><enabled>Y</enabled><name>ID_BATCH</name></field><field><id>CHANNEL_ID</id><enabled>Y</enabled><name>CHANNEL_ID</name></field><field><id>TRANSNAME</id><enabled>Y</enabled><name>TRANSNAME</name></field><field><id>STATUS</id><enabled>Y</enabled><name>STATUS</name></field><field><id>LINES_READ</id><enabled>Y</enabled><name>LINES_READ</name><subject/></field><field><id>LINES_WRITTEN</id><enabled>Y</enabled><name>LINES_WRITTEN</name><subject/></field><field><id>LINES_UPDATED</id><enabled>Y</enabled><name>LINES_UPDATED</name><subject/></field><field><id>LINES_INPUT</id><enabled>Y</enabled><name>LINES_INPUT</name><subject/></field><field><id>LINES_OUTPUT</id><enabled>Y</enabled><name>LINES_OUTPUT</name><subject/></field><field><id>LINES_REJECTED</id><enabled>Y</enabled><name>LINES_REJECTED</name><subject/></field><field><id>ERRORS</id><enabled>Y</enabled><name>ERRORS</name></field><field><id>STARTDATE</id><enabled>Y</enabled><name>STARTDATE</name></field><field><id>ENDDATE</id><enabled>Y</enabled><name>ENDDATE</name></field><field><id>LOGDATE</id><enabled>Y</enabled><name>LOGDATE</name></field><field><id>DEPDATE</id><enabled>Y</enabled><name>DEPDATE</name></field><field><id>REPLAYDATE</id><enabled>Y</enabled><name>REPLAYDATE</name></field><field><id>LOG_FIELD</id><enabled>Y</enabled><name>LOG_FIELD</name></field><field><id>EXECUTING_SERVER</id><enabled>N</enabled><name>EXECUTING_SERVER</name></field><field><id>EXECUTING_USER</id><enabled>N</enabled><name>EXECUTING_USER</name></field><field><id>CLIENT</id><enabled>N</enabled><name>CLIENT</name></field></trans-log-table> | |
<perf-log-table><connection/> | |
<schema/> | |
<table/> | |
<interval/> | |
<timeout_days/> | |
<field><id>ID_BATCH</id><enabled>Y</enabled><name>ID_BATCH</name></field><field><id>SEQ_NR</id><enabled>Y</enabled><name>SEQ_NR</name></field><field><id>LOGDATE</id><enabled>Y</enabled><name>LOGDATE</name></field><field><id>TRANSNAME</id><enabled>Y</enabled><name>TRANSNAME</name></field><field><id>STEPNAME</id><enabled>Y</enabled><name>STEPNAME</name></field><field><id>STEP_COPY</id><enabled>Y</enabled><name>STEP_COPY</name></field><field><id>LINES_READ</id><enabled>Y</enabled><name>LINES_READ</name></field><field><id>LINES_WRITTEN</id><enabled>Y</enabled><name>LINES_WRITTEN</name></field><field><id>LINES_UPDATED</id><enabled>Y</enabled><name>LINES_UPDATED</name></field><field><id>LINES_INPUT</id><enabled>Y</enabled><name>LINES_INPUT</name></field><field><id>LINES_OUTPUT</id><enabled>Y</enabled><name>LINES_OUTPUT</name></field><field><id>LINES_REJECTED</id><enabled>Y</enabled><name>LINES_REJECTED</name></field><field><id>ERRORS</id><enabled>Y</enabled><name>ERRORS</name></field><field><id>INPUT_BUFFER_ROWS</id><enabled>Y</enabled><name>INPUT_BUFFER_ROWS</name></field><field><id>OUTPUT_BUFFER_ROWS</id><enabled>Y</enabled><name>OUTPUT_BUFFER_ROWS</name></field></perf-log-table> | |
<channel-log-table><connection/> | |
<schema/> | |
<table/> | |
<timeout_days/> | |
<field><id>ID_BATCH</id><enabled>Y</enabled><name>ID_BATCH</name></field><field><id>CHANNEL_ID</id><enabled>Y</enabled><name>CHANNEL_ID</name></field><field><id>LOG_DATE</id><enabled>Y</enabled><name>LOG_DATE</name></field><field><id>LOGGING_OBJECT_TYPE</id><enabled>Y</enabled><name>LOGGING_OBJECT_TYPE</name></field><field><id>OBJECT_NAME</id><enabled>Y</enabled><name>OBJECT_NAME</name></field><field><id>OBJECT_COPY</id><enabled>Y</enabled><name>OBJECT_COPY</name></field><field><id>REPOSITORY_DIRECTORY</id><enabled>Y</enabled><name>REPOSITORY_DIRECTORY</name></field><field><id>FILENAME</id><enabled>Y</enabled><name>FILENAME</name></field><field><id>OBJECT_ID</id><enabled>Y</enabled><name>OBJECT_ID</name></field><field><id>OBJECT_REVISION</id><enabled>Y</enabled><name>OBJECT_REVISION</name></field><field><id>PARENT_CHANNEL_ID</id><enabled>Y</enabled><name>PARENT_CHANNEL_ID</name></field><field><id>ROOT_CHANNEL_ID</id><enabled>Y</enabled><name>ROOT_CHANNEL_ID</name></field></channel-log-table> | |
<step-log-table><connection/> | |
<schema/> | |
<table/> | |
<timeout_days/> | |
<field><id>ID_BATCH</id><enabled>Y</enabled><name>ID_BATCH</name></field><field><id>CHANNEL_ID</id><enabled>Y</enabled><name>CHANNEL_ID</name></field><field><id>LOG_DATE</id><enabled>Y</enabled><name>LOG_DATE</name></field><field><id>TRANSNAME</id><enabled>Y</enabled><name>TRANSNAME</name></field><field><id>STEPNAME</id><enabled>Y</enabled><name>STEPNAME</name></field><field><id>STEP_COPY</id><enabled>Y</enabled><name>STEP_COPY</name></field><field><id>LINES_READ</id><enabled>Y</enabled><name>LINES_READ</name></field><field><id>LINES_WRITTEN</id><enabled>Y</enabled><name>LINES_WRITTEN</name></field><field><id>LINES_UPDATED</id><enabled>Y</enabled><name>LINES_UPDATED</name></field><field><id>LINES_INPUT</id><enabled>Y</enabled><name>LINES_INPUT</name></field><field><id>LINES_OUTPUT</id><enabled>Y</enabled><name>LINES_OUTPUT</name></field><field><id>LINES_REJECTED</id><enabled>Y</enabled><name>LINES_REJECTED</name></field><field><id>ERRORS</id><enabled>Y</enabled><name>ERRORS</name></field><field><id>LOG_FIELD</id><enabled>N</enabled><name>LOG_FIELD</name></field></step-log-table> | |
<metrics-log-table><connection/> | |
<schema/> | |
<table/> | |
<timeout_days/> | |
<field><id>ID_BATCH</id><enabled>Y</enabled><name>ID_BATCH</name></field><field><id>CHANNEL_ID</id><enabled>Y</enabled><name>CHANNEL_ID</name></field><field><id>LOG_DATE</id><enabled>Y</enabled><name>LOG_DATE</name></field><field><id>METRICS_DATE</id><enabled>Y</enabled><name>METRICS_DATE</name></field><field><id>METRICS_CODE</id><enabled>Y</enabled><name>METRICS_CODE</name></field><field><id>METRICS_DESCRIPTION</id><enabled>Y</enabled><name>METRICS_DESCRIPTION</name></field><field><id>METRICS_SUBJECT</id><enabled>Y</enabled><name>METRICS_SUBJECT</name></field><field><id>METRICS_TYPE</id><enabled>Y</enabled><name>METRICS_TYPE</name></field><field><id>METRICS_VALUE</id><enabled>Y</enabled><name>METRICS_VALUE</name></field></metrics-log-table> | |
</log> | |
<maxdate> | |
<connection/> | |
<table/> | |
<field/> | |
<offset>0.0</offset> | |
<maxdiff>0.0</maxdiff> | |
</maxdate> | |
<size_rowset>10000</size_rowset> | |
<sleep_time_empty>50</sleep_time_empty> | |
<sleep_time_full>50</sleep_time_full> | |
<unique_connections>N</unique_connections> | |
<feedback_shown>Y</feedback_shown> | |
<feedback_size>50000</feedback_size> | |
<using_thread_priorities>Y</using_thread_priorities> | |
<shared_objects_file/> | |
<capture_step_performance>N</capture_step_performance> | |
<step_performance_capturing_delay>1000</step_performance_capturing_delay> | |
<step_performance_capturing_size_limit>100</step_performance_capturing_size_limit> | |
<dependencies> | |
</dependencies> | |
<partitionschemas> | |
</partitionschemas> | |
<slaveservers> | |
</slaveservers> | |
<clusterschemas> | |
</clusterschemas> | |
<created_user>-</created_user> | |
<created_date>2014/12/08 16:14:40.572</created_date> | |
<modified_user>-</modified_user> | |
<modified_date>2014/12/08 16:14:40.572</modified_date> | |
</info> | |
<notepads> | |
<notepad> | |
<note> ^^^^^^
Sort on the desired
 column(s) here.</note> | |
<xloc>99</xloc> | |
<yloc>220</yloc> | |
<width>131</width> | |
<heigth>58</heigth> | |
<fontname>Lucida Grande</fontname> | |
<fontsize>13</fontsize> | |
<fontbold>N</fontbold> | |
<fontitalic>N</fontitalic> | |
<fontcolorred>0</fontcolorred> | |
<fontcolorgreen>0</fontcolorgreen> | |
<fontcolorblue>0</fontcolorblue> | |
<backgroundcolorred>255</backgroundcolorred> | |
<backgroundcolorgreen>205</backgroundcolorgreen> | |
<backgroundcolorblue>112</backgroundcolorblue> | |
<bordercolorred>100</bordercolorred> | |
<bordercolorgreen>100</bordercolorgreen> | |
<bordercolorblue>100</bordercolorblue> | |
<drawshadow>Y</drawshadow> | |
</notepad> | |
<notepad> | |
<note>Output step --></note> | |
<xloc>2</xloc> | |
<yloc>318</yloc> | |
<width>115</width> | |
<heigth>26</heigth> | |
<fontname>Lucida Grande</fontname> | |
<fontsize>13</fontsize> | |
<fontbold>N</fontbold> | |
<fontitalic>N</fontitalic> | |
<fontcolorred>0</fontcolorred> | |
<fontcolorgreen>0</fontcolorgreen> | |
<fontcolorblue>0</fontcolorblue> | |
<backgroundcolorred>255</backgroundcolorred> | |
<backgroundcolorgreen>205</backgroundcolorgreen> | |
<backgroundcolorblue>112</backgroundcolorblue> | |
<bordercolorred>100</bordercolorred> | |
<bordercolorgreen>100</bordercolorgreen> | |
<bordercolorblue>100</bordercolorblue> | |
<drawshadow>Y</drawshadow> | |
</notepad> | |
</notepads> | |
<order> | |
<hop> <from>Sort by name</from><to>+ Sorted rownum</to><enabled>Y</enabled> </hop> | |
<hop> <from>CSV file input</from><to>Select values</to><enabled>Y</enabled> </hop> | |
<hop> <from>Select values</from><to>Sort by name</to><enabled>Y</enabled> </hop> | |
<hop> <from>CSV file input</from><to>Join on sort columns</to><enabled>Y</enabled> </hop> | |
<hop> <from>+ Sorted rownum</from><to>Join on sort columns</to><enabled>Y</enabled> </hop> | |
<hop> <from>Join on sort columns</from><to>Get Δ and Δ^2</to><enabled>Y</enabled> </hop> | |
<hop> <from>Get Δ and Δ^2</from><to>Σ(Δ^2) and #Rows</to><enabled>Y</enabled> </hop> | |
<hop> <from>Σ(Δ^2) and #Rows</from><to>Spearman's ρ</to><enabled>Y</enabled> </hop> | |
<hop> <from>Select values</from><to>+ Sorted rownum</to><enabled>N</enabled> </hop> | |
<hop> <from>Spearman's ρ</from><to>ρ, ρ, ρ, your boat...</to><enabled>Y</enabled> </hop> | |
<hop> <from>ρ, ρ, ρ, your boat...</from><to>How sorted is the data?</to><enabled>Y</enabled> </hop> | |
</order> | |
<step> | |
<name>+ Sorted rownum</name> | |
<type>Sequence</type> | |
<description/> | |
<distribute>N</distribute> | |
<custom_distribution/> | |
<copies>1</copies> | |
<partitioning> | |
<method>none</method> | |
<schema_name/> | |
</partitioning> | |
<valuename>sort_rownum</valuename> | |
<use_database>N</use_database> | |
<connection/> | |
<schema/> | |
<seqname>SEQ_</seqname> | |
<use_counter>Y</use_counter> | |
<counter_name/> | |
<start_at>1</start_at> | |
<increment_by>1</increment_by> | |
<max_value>999999999</max_value> | |
<cluster_schema/> | |
<remotesteps> <input> </input> <output> </output> </remotesteps> <GUI> | |
<xloc>241</xloc> | |
<yloc>125</yloc> | |
<draw>Y</draw> | |
</GUI> | |
</step> | |
<step> | |
<name>CSV file input</name> | |
<type>CsvInput</type> | |
<description/> | |
<distribute>N</distribute> | |
<custom_distribution/> | |
<copies>1</copies> | |
<partitioning> | |
<method>none</method> | |
<schema_name/> | |
</partitioning> | |
<filename>samples/transformations/files/customers-100.txt</filename> | |
<filename_field/> | |
<rownum_field>raw_rownum</rownum_field> | |
<include_filename>N</include_filename> | |
<separator>;</separator> | |
<enclosure>"</enclosure> | |
<header>Y</header> | |
<buffer_size>50000</buffer_size> | |
<lazy_conversion>Y</lazy_conversion> | |
<add_filename_result>N</add_filename_result> | |
<parallel>N</parallel> | |
<newline_possible>N</newline_possible> | |
<encoding/> | |
<fields> | |
<field> | |
<name>id</name> | |
<type>Integer</type> | |
<format> #</format> | |
<currency>$</currency> | |
<decimal>.</decimal> | |
<group>,</group> | |
<length>15</length> | |
<precision>0</precision> | |
<trim_type>none</trim_type> | |
</field> | |
<field> | |
<name>name</name> | |
<type>String</type> | |
<format/> | |
<currency>$</currency> | |
<decimal>.</decimal> | |
<group>,</group> | |
<length>10</length> | |
<precision>-1</precision> | |
<trim_type>none</trim_type> | |
</field> | |
<field> | |
<name>firstname</name> | |
<type>String</type> | |
<format/> | |
<currency>$</currency> | |
<decimal>.</decimal> | |
<group>,</group> | |
<length>13</length> | |
<precision>-1</precision> | |
<trim_type>none</trim_type> | |
</field> | |
<field> | |
<name>zip</name> | |
<type>Integer</type> | |
<format> #</format> | |
<currency>$</currency> | |
<decimal>.</decimal> | |
<group>,</group> | |
<length>15</length> | |
<precision>0</precision> | |
<trim_type>none</trim_type> | |
</field> | |
<field> | |
<name>city</name> | |
<type>String</type> | |
<format/> | |
<currency>$</currency> | |
<decimal>.</decimal> | |
<group>,</group> | |
<length>8</length> | |
<precision>-1</precision> | |
<trim_type>none</trim_type> | |
</field> | |
<field> | |
<name>birthdate</name> | |
<type>Date</type> | |
<format>yyyy/MM/dd</format> | |
<currency>$</currency> | |
<decimal>.</decimal> | |
<group>,</group> | |
<length>-1</length> | |
<precision>-1</precision> | |
<trim_type>none</trim_type> | |
</field> | |
<field> | |
<name>street</name> | |
<type>String</type> | |
<format/> | |
<currency>$</currency> | |
<decimal>.</decimal> | |
<group>,</group> | |
<length>11</length> | |
<precision>-1</precision> | |
<trim_type>none</trim_type> | |
</field> | |
<field> | |
<name>housenr</name> | |
<type>Integer</type> | |
<format> #</format> | |
<currency>$</currency> | |
<decimal>.</decimal> | |
<group>,</group> | |
<length>15</length> | |
<precision>0</precision> | |
<trim_type>none</trim_type> | |
</field> | |
<field> | |
<name>stateCode</name> | |
<type>String</type> | |
<format/> | |
<currency>$</currency> | |
<decimal>.</decimal> | |
<group>,</group> | |
<length>9</length> | |
<precision>-1</precision> | |
<trim_type>none</trim_type> | |
</field> | |
<field> | |
<name>state</name> | |
<type>String</type> | |
<format/> | |
<currency>$</currency> | |
<decimal>.</decimal> | |
<group>,</group> | |
<length>30</length> | |
<precision>-1</precision> | |
<trim_type>none</trim_type> | |
</field> | |
</fields> | |
<cluster_schema/> | |
<remotesteps> <input> </input> <output> </output> </remotesteps> <GUI> | |
<xloc>47</xloc> | |
<yloc>32</yloc> | |
<draw>Y</draw> | |
</GUI> | |
</step> | |
<step> | |
<name>Sort by name</name> | |
<type>SortRows</type> | |
<description/> | |
<distribute>Y</distribute> | |
<custom_distribution/> | |
<copies>1</copies> | |
<partitioning> | |
<method>none</method> | |
<schema_name/> | |
</partitioning> | |
<directory>%%java.io.tmpdir%%</directory> | |
<prefix>out</prefix> | |
<sort_size>1000000</sort_size> | |
<free_memory/> | |
<compress>N</compress> | |
<compress_variable/> | |
<unique_rows>N</unique_rows> | |
<fields> | |
<field> | |
<name>sort_name</name> | |
<ascending>Y</ascending> | |
<case_sensitive>Y</case_sensitive> | |
<presorted>N</presorted> | |
</field> | |
</fields> | |
<cluster_schema/> | |
<remotesteps> <input> </input> <output> </output> </remotesteps> <GUI> | |
<xloc>144</xloc> | |
<yloc>158</yloc> | |
<draw>Y</draw> | |
</GUI> | |
</step> | |
<step> | |
<name>Select values</name> | |
<type>SelectValues</type> | |
<description/> | |
<distribute>Y</distribute> | |
<custom_distribution/> | |
<copies>1</copies> | |
<partitioning> | |
<method>none</method> | |
<schema_name/> | |
</partitioning> | |
<fields> <field> <name>name</name> | |
<rename>sort_name</rename> | |
<length>-2</length> | |
<precision>-2</precision> | |
</field> <select_unspecified>N</select_unspecified> | |
</fields> <cluster_schema/> | |
<remotesteps> <input> </input> <output> </output> </remotesteps> <GUI> | |
<xloc>46</xloc> | |
<yloc>125</yloc> | |
<draw>Y</draw> | |
</GUI> | |
</step> | |
<step> | |
<name>Join on sort columns</name> | |
<type>JoinRows</type> | |
<description/> | |
<distribute>Y</distribute> | |
<custom_distribution/> | |
<copies>1</copies> | |
<partitioning> | |
<method>none</method> | |
<schema_name/> | |
</partitioning> | |
<directory>%%java.io.tmpdir%%</directory> | |
<prefix>out</prefix> | |
<cache_size>5000</cache_size> | |
<main>+ Sorted rownum</main> | |
<compare> | |
<condition> | |
<negated>N</negated> | |
<leftvalue>sort_name</leftvalue> | |
<function>=</function> | |
<rightvalue>name</rightvalue> | |
</condition> | |
</compare> | |
<cluster_schema/> | |
<remotesteps> <input> </input> <output> </output> </remotesteps> <GUI> | |
<xloc>241</xloc> | |
<yloc>32</yloc> | |
<draw>Y</draw> | |
</GUI> | |
</step> | |
<step> | |
<name>Get Δ and Δ^2</name> | |
<type>Calculator</type> | |
<description/> | |
<distribute>Y</distribute> | |
<custom_distribution/> | |
<copies>1</copies> | |
<partitioning> | |
<method>none</method> | |
<schema_name/> | |
</partitioning> | |
<calculation><field_name>Δ</field_name> | |
<calc_type>SUBTRACT</calc_type> | |
<field_a>sort_rownum</field_a> | |
<field_b>raw_rownum</field_b> | |
<field_c/> | |
<value_type>None</value_type> | |
<value_length>-1</value_length> | |
<value_precision>-1</value_precision> | |
<remove>N</remove> | |
<conversion_mask/> | |
<decimal_symbol/> | |
<grouping_symbol/> | |
<currency_symbol/> | |
</calculation> | |
<calculation><field_name>Δ^2</field_name> | |
<calc_type>SQUARE</calc_type> | |
<field_a>Δ</field_a> | |
<field_b/> | |
<field_c/> | |
<value_type>None</value_type> | |
<value_length>-1</value_length> | |
<value_precision>-1</value_precision> | |
<remove>N</remove> | |
<conversion_mask/> | |
<decimal_symbol/> | |
<grouping_symbol/> | |
<currency_symbol/> | |
</calculation> | |
<cluster_schema/> | |
<remotesteps> <input> </input> <output> </output> </remotesteps> <GUI> | |
<xloc>399</xloc> | |
<yloc>32</yloc> | |
<draw>Y</draw> | |
</GUI> | |
</step> | |
<step> | |
<name>Σ(Δ^2) and #Rows</name> | |
<type>GroupBy</type> | |
<description/> | |
<distribute>Y</distribute> | |
<custom_distribution/> | |
<copies>1</copies> | |
<partitioning> | |
<method>none</method> | |
<schema_name/> | |
</partitioning> | |
<all_rows>N</all_rows> | |
<ignore_aggregate>N</ignore_aggregate> | |
<field_ignore/> | |
<directory>%%java.io.tmpdir%%</directory> | |
<prefix>grp</prefix> | |
<add_linenr>N</add_linenr> | |
<linenr_fieldname/> | |
<give_back_row>N</give_back_row> | |
<group> | |
</group> | |
<fields> | |
<field> | |
<aggregate>Σ(Δ^2)</aggregate> | |
<subject>Δ^2</subject> | |
<type>SUM</type> | |
<valuefield>sum-diff-squared</valuefield> | |
</field> | |
<field> | |
<aggregate>Number of Rows</aggregate> | |
<subject/> | |
<type>COUNT_ANY</type> | |
<valuefield/> | |
</field> | |
</fields> | |
<cluster_schema/> | |
<remotesteps> <input> </input> <output> </output> </remotesteps> <GUI> | |
<xloc>402</xloc> | |
<yloc>122</yloc> | |
<draw>Y</draw> | |
</GUI> | |
</step> | |
<step> | |
<name>Spearman's ρ</name> | |
<type>Calculator</type> | |
<description/> | |
<distribute>Y</distribute> | |
<custom_distribution/> | |
<copies>1</copies> | |
<partitioning> | |
<method>none</method> | |
<schema_name/> | |
</partitioning> | |
<calculation><field_name>ONE</field_name> | |
<calc_type>CONSTANT</calc_type> | |
<field_a>1</field_a> | |
<field_b/> | |
<field_c/> | |
<value_type>Number</value_type> | |
<value_length>-1</value_length> | |
<value_precision>-1</value_precision> | |
<remove>Y</remove> | |
<conversion_mask/> | |
<decimal_symbol/> | |
<grouping_symbol/> | |
<currency_symbol/> | |
</calculation> | |
<calculation><field_name>SIX</field_name> | |
<calc_type>CONSTANT</calc_type> | |
<field_a>6</field_a> | |
<field_b/> | |
<field_c/> | |
<value_type>Number</value_type> | |
<value_length>-1</value_length> | |
<value_precision>-1</value_precision> | |
<remove>Y</remove> | |
<conversion_mask/> | |
<decimal_symbol/> | |
<grouping_symbol/> | |
<currency_symbol/> | |
</calculation> | |
<calculation><field_name>N^2</field_name> | |
<calc_type>SQUARE</calc_type> | |
<field_a>Number of Rows</field_a> | |
<field_b/> | |
<field_c/> | |
<value_type>Number</value_type> | |
<value_length>-1</value_length> | |
<value_precision>-1</value_precision> | |
<remove>Y</remove> | |
<conversion_mask/> | |
<decimal_symbol/> | |
<grouping_symbol/> | |
<currency_symbol/> | |
</calculation> | |
<calculation><field_name>N^2-1</field_name> | |
<calc_type>SUBTRACT</calc_type> | |
<field_a>N^2</field_a> | |
<field_b>ONE</field_b> | |
<field_c/> | |
<value_type>Number</value_type> | |
<value_length>-1</value_length> | |
<value_precision>-1</value_precision> | |
<remove>Y</remove> | |
<conversion_mask/> | |
<decimal_symbol/> | |
<grouping_symbol/> | |
<currency_symbol/> | |
</calculation> | |
<calculation><field_name>N(N^2-1)</field_name> | |
<calc_type>MULTIPLY</calc_type> | |
<field_a>Number of Rows</field_a> | |
<field_b>N^2-1</field_b> | |
<field_c/> | |
<value_type>Number</value_type> | |
<value_length>-1</value_length> | |
<value_precision>-1</value_precision> | |
<remove>Y</remove> | |
<conversion_mask/> | |
<decimal_symbol/> | |
<grouping_symbol/> | |
<currency_symbol/> | |
</calculation> | |
<calculation><field_name>6Σ(Δ^2)</field_name> | |
<calc_type>MULTIPLY</calc_type> | |
<field_a>SIX</field_a> | |
<field_b>Σ(Δ^2)</field_b> | |
<field_c/> | |
<value_type>Number</value_type> | |
<value_length>-1</value_length> | |
<value_precision>-1</value_precision> | |
<remove>Y</remove> | |
<conversion_mask/> | |
<decimal_symbol/> | |
<grouping_symbol/> | |
<currency_symbol/> | |
</calculation> | |
<calculation><field_name>6Σ(Δ^2)/N(N^2-1)</field_name> | |
<calc_type>DIVIDE</calc_type> | |
<field_a>6Σ(Δ^2)</field_a> | |
<field_b>N(N^2-1)</field_b> | |
<field_c/> | |
<value_type>Number</value_type> | |
<value_length>-1</value_length> | |
<value_precision>-1</value_precision> | |
<remove>Y</remove> | |
<conversion_mask/> | |
<decimal_symbol/> | |
<grouping_symbol/> | |
<currency_symbol/> | |
</calculation> | |
<calculation><field_name>ρ</field_name> | |
<calc_type>SUBTRACT</calc_type> | |
<field_a>ONE</field_a> | |
<field_b>6Σ(Δ^2)/N(N^2-1)</field_b> | |
<field_c/> | |
<value_type>Number</value_type> | |
<value_length>-1</value_length> | |
<value_precision>-1</value_precision> | |
<remove>Y</remove> | |
<conversion_mask/> | |
<decimal_symbol/> | |
<grouping_symbol/> | |
<currency_symbol/> | |
</calculation> | |
<calculation><field_name>ABS(ρ)</field_name> | |
<calc_type>ABS</calc_type> | |
<field_a>ρ</field_a> | |
<field_b/> | |
<field_c/> | |
<value_type>Number</value_type> | |
<value_length>-1</value_length> | |
<value_precision>-1</value_precision> | |
<remove>N</remove> | |
<conversion_mask/> | |
<decimal_symbol/> | |
<grouping_symbol/> | |
<currency_symbol/> | |
</calculation> | |
<cluster_schema/> | |
<remotesteps> <input> </input> <output> </output> </remotesteps> <GUI> | |
<xloc>403</xloc> | |
<yloc>218</yloc> | |
<draw>Y</draw> | |
</GUI> | |
</step> | |
<step> | |
<name>How sorted is the data?</name> | |
<type>NumberRange</type> | |
<description/> | |
<distribute>Y</distribute> | |
<custom_distribution/> | |
<copies>1</copies> | |
<partitioning> | |
<method>none</method> | |
<schema_name/> | |
</partitioning> | |
<inputField>ABS(ρ)</inputField> | |
<outputField>Sortedness</outputField> | |
<fallBackValue>unknown</fallBackValue> | |
<rules> | |
<rule> | |
<lower_bound>0.0</lower_bound> | |
<upper_bound>0.33</upper_bound> | |
<value>Not very sorted</value> | |
</rule> | |
<rule> | |
<lower_bound>0.33</lower_bound> | |
<upper_bound>0.66</upper_bound> | |
<value>Somewhat sorted</value> | |
</rule> | |
<rule> | |
<lower_bound>0.66</lower_bound> | |
<upper_bound>1.0</upper_bound> | |
<value>Sorted (or reverse sorted)</value> | |
</rule> | |
</rules> | |
<cluster_schema/> | |
<remotesteps> <input> </input> <output> </output> </remotesteps> <GUI> | |
<xloc>147</xloc> | |
<yloc>321</yloc> | |
<draw>Y</draw> | |
</GUI> | |
</step> | |
<step> | |
<name>ρ, ρ, ρ, your boat...</name> | |
<type>SelectValues</type> | |
<description/> | |
<distribute>Y</distribute> | |
<custom_distribution/> | |
<copies>1</copies> | |
<partitioning> | |
<method>none</method> | |
<schema_name/> | |
</partitioning> | |
<fields> <field> <name>ABS(ρ)</name> | |
<rename/> | |
<length>-2</length> | |
<precision>-2</precision> | |
</field> <select_unspecified>N</select_unspecified> | |
</fields> <cluster_schema/> | |
<remotesteps> <input> </input> <output> </output> </remotesteps> <GUI> | |
<xloc>400</xloc> | |
<yloc>321</yloc> | |
<draw>Y</draw> | |
</GUI> | |
</step> | |
<step_error_handling> | |
</step_error_handling> | |
<slave-step-copy-partition-distribution> | |
</slave-step-copy-partition-distribution> | |
<slave_transformation>N</slave_transformation> | |
</transformation> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment