Skip to content

Instantly share code, notes, and snippets.

@raleighlinda
Created March 21, 2013 14:34
Show Gist options
  • Save raleighlinda/5213490 to your computer and use it in GitHub Desktop.
Save raleighlinda/5213490 to your computer and use it in GitHub Desktop.
Titanic Enterprise Miner
<?xml version="1.0" encoding="UTF-8"?><_ROOT_ EMVERSION="12.1" ORIENTATION="HORIZONTAL">
<Workspace>
<NODES>
<NODE CLASS="SASHELP.EMCORE.EMCODETOOL.CLASS" COMPONENT="SASCode" ID="EMCODE" X="426" Y="252" DESCRIPTION="SAS Code" DIAGRAMID="_ROOT_" PARENTID="" TOOLTYPE="UTILITY" TOOLPREFIX="EMCODE">
<PROPERTIES UsePriors="Y" ToolType="UTILITY" DataNeeded="N" PublishCode="PUBLISH" ScoreCodeFormat="DATASTEP" MetaAdvisor="BASIC" ForceRun="N" RunAction="Train" Component="SASCode">
<VariableProperties/>
<EmVariableAttributes/>
<EmTargetAttributes/>
<EmFileProperties>
<USERTRAINCODE Type="EXTERNAL" Extension="sas">
<Data><![CDATA[
data TEMP;
set &amp;EM_IMPORT_SCORE(keep = EM_CLASSIFICATION);
run;
PROC EXPORT DATA = TEMP OUTFILE ='C:\Users\Linda\Documents\TitanicScored.CSV';
RUN;
]]>
</Data>
</USERTRAINCODE>
<EMNOTES Type="EXTERNAL" Extension="txt">
<Data/>
</EMNOTES>
</EmFileProperties>
</PROPERTIES>
</NODE>
<NODE CLASS="SASHELP.EMCORE.EXTENSION.CLASS" COMPONENT="FileImport" ID="FIMPORT" X="46" Y="166" DESCRIPTION="File Import Train" DIAGRAMID="_ROOT_" PARENTID="" TOOLTYPE="SAMPLE" TOOLPREFIX="FIMPORT">
<PROPERTIES Location="CATALOG" Catalog="SASHELP.EMSAMP.Fimport.SOURCE" ImportType="Local" GuessRows="500" Delimiter="," NameRow="Y" SkipRows="0" MaxRows="1000000" MaxCols="10000" FileType="csv" Role="TRAIN" ForceRun="N" Summarize="N" AdvancedAdvisor="N" RunAction="Train" Component="FileImport" IFileName="C:\train.csv" AccessTable="NoTableName" UserID="NoUserID" Password="NoPassword">
<VariableProperties/>
<EmVariableAttributes>
<DELTA Variable="survived" AttributeName="ROLE" AttributeValue="TARGET"/>
<DELTA Variable="survived" AttributeName="LEVEL" AttributeValue="BINARY"/>
</EmVariableAttributes>
<EmTargetAttributes/>
<EmFileProperties>
<USERTRAINCODE Type="EXTERNAL" Extension="sas">
<Data/>
</USERTRAINCODE>
<VARIABLESETDELTA Type="EXTERNAL" Extension="txt">
<Data><![CDATA[
if NAME=&quot;survived&quot; then do;
ROLE=&quot;TARGET&quot;;
LEVEL=&quot;BINARY&quot;;
ORDER=&quot;&quot;;
DROP=&quot;N&quot;;
end;
]]>
</Data>
</VARIABLESETDELTA>
<DELTACODE Type="EXTERNAL" Extension="txt">
<Data><![CDATA[
if NAME=&quot;survived&quot; then do;
ROLE=&quot;TARGET&quot;;
LEVEL=&quot;BINARY&quot;;
ORDER=&quot;&quot;;
end;
drop DROP;
]]>
</Data>
</DELTACODE>
<EMNOTES Type="EXTERNAL" Extension="txt">
<Data/>
</EMNOTES>
</EmFileProperties>
</PROPERTIES>
</NODE>
<NODE CLASS="SASHELP.EMCORE.EXTENSION.CLASS" COMPONENT="FileImport" ID="FIMPORT2" X="428" Y="74" DESCRIPTION="File Import Test" DIAGRAMID="_ROOT_" PARENTID="" TOOLTYPE="SAMPLE" TOOLPREFIX="FIMPORT">
<PROPERTIES Location="CATALOG" Catalog="SASHELP.EMSAMP.Fimport.SOURCE" ImportType="Local" GuessRows="500" Delimiter="," NameRow="Y" SkipRows="0" MaxRows="1000000" MaxCols="10000" FileType="csv" Role="SCORE" ForceRun="N" Summarize="N" AdvancedAdvisor="N" RunAction="Train" Component="FileImport" IFileName="C:\test.csv" AccessTable="NoTableName" UserID="NoUserID" Password="NoPassword">
<VariableProperties/>
<EmVariableAttributes/>
<EmTargetAttributes/>
<EmFileProperties>
<USERTRAINCODE Type="EXTERNAL" Extension="sas">
<Data/>
</USERTRAINCODE>
<VARIABLESETDELTA Type="EXTERNAL" Extension="txt">
<Data/>
</VARIABLESETDELTA>
<DELTACODE Type="EXTERNAL" Extension="txt">
<Data/>
</DELTACODE>
<EMNOTES Type="EXTERNAL" Extension="txt">
<Data/>
</EMNOTES>
</EmFileProperties>
</PROPERTIES>
</NODE>
<NODE CLASS="SASHELP.EMMODL.REGRESSION.CLASS" COMPONENT="Regression" ID="Reg" X="232" Y="166" DESCRIPTION="Regression" DIAGRAMID="_ROOT_" PARENTID="" TOOLTYPE="MODEL" TOOLPREFIX="Reg">
<PROPERTIES MainEffect="Y" TwoFactor="N" Polynomial="N" PolynomialDegree="2" Terms="N" Error="LOGISTIC" LinkFunction="LOGIT" SuppressIntercept="N" InputCoding="DEVIATION" MinResourceUse="N" ModelSelection="NONE" SelectionCriterion="DEFAULT" SelectionDefault="Y" Sequential="N" SlEntry="0.05" SlStay="0.05" Start="0" Stop="0" Force="0" Hierarchy="CLASS" Rule="NONE" MaxStep="." StepOutput="N" OptimizationTechnique="DEFAULT" ModelDefaults="Y" MaxIterations="." MaxFunctionCalls="." MaxCPUTime="1 HOUR" ConvDefaults="Y" AbsConvValue="-1.34078E154" AbsFValue="0" AbsFTime="1" AbsGValue="0.00001" AbsGTime="1" AbsXValue="1E-8" AbsXTime="1" FConvValue="0" FConvTimes="1" GConvValue="1E-6" GConvTimes="1" ClParm="N" Covout="N" CovB="N" CorB="N" Simple="N" SuppressOutput="N" Details="N" PrintDesignMatrix="N" SASSPDS="N" Performance="N" ExcludedVariable="REJECT" ForceRun="N" RunAction="Train" Component="Regression">
<VariableProperties/>
<EmVariableAttributes>
<DELTA Variable="age" AttributeName="USE" AttributeValue="N"/>
<DELTA Variable="cabin" AttributeName="USE" AttributeValue="N"/>
<DELTA Variable="embarked" AttributeName="USE" AttributeValue="N"/>
<DELTA Variable="fare" AttributeName="USE" AttributeValue="N"/>
<DELTA Variable="parch" AttributeName="USE" AttributeValue="N"/>
<DELTA Variable="pclass" AttributeName="USE" AttributeValue="Y"/>
<DELTA Variable="sex" AttributeName="USE" AttributeValue="Y"/>
<DELTA Variable="sibsp" AttributeName="USE" AttributeValue="N"/>
<DELTA Variable="ticket" AttributeName="USE" AttributeValue="N"/>
</EmVariableAttributes>
<EmTargetAttributes>
<survived>
<DecData Table="EMWS4.Reg_survived_DD">
<Data>
<Obs SURVIVED="1" COUNT="342" DATAPRIOR="0.3838383838" TRAINPRIOR="0.3838383838" DECPRIOR="." DECISION1="1" DECISION2="0"/>
<Obs SURVIVED="0" COUNT="549" DATAPRIOR="0.6161616162" TRAINPRIOR="0.6161616162" DECPRIOR="." DECISION1="0" DECISION2="1"/>
</Data>
<Metadata>
<Variable NAME="survived" LABEL="" LENGTH="32" FORMAT="" TYPE="C"/>
<Variable NAME="COUNT" LABEL="Level Counts" LENGTH="8" FORMAT="10." TYPE="N"/>
<Variable NAME="DATAPRIOR" LABEL="Data Proportions" LENGTH="8" FORMAT="" TYPE="N"/>
<Variable NAME="TRAINPRIOR" LABEL="Training Proportions" LENGTH="8" FORMAT="" TYPE="N"/>
<Variable NAME="DECPRIOR" LABEL="Decision Priors" LENGTH="8" FORMAT="" TYPE="N"/>
<Variable NAME="DECISION1" LABEL="1" LENGTH="8" FORMAT="" TYPE="N"/>
<Variable NAME="DECISION2" LABEL="0" LENGTH="8" FORMAT="" TYPE="N"/>
</Metadata>
</DecData>
<DecMeta Table="EMWS4.Reg_survived_DM">
<Data>
<Obs _TYPE_="MATRIX" VARIABLE="" LABEL="" LEVEL="PROFIT" EVENT="" ORDER="" FORMAT="" TYPE="" COST="" USE="N"/>
<Obs _TYPE_="TARGET" VARIABLE="survived" LABEL="" LEVEL="BINARY" EVENT="1" ORDER="" FORMAT="BEST12.0" TYPE="N" COST="" USE=""/>
<Obs _TYPE_="DECISION" VARIABLE="DECISION1" LABEL="1" LEVEL="" EVENT="" ORDER="" FORMAT="" TYPE="N" COST="" USE="Y"/>
<Obs _TYPE_="DECISION" VARIABLE="DECISION2" LABEL="0" LEVEL="" EVENT="" ORDER="" FORMAT="" TYPE="N" COST="" USE="Y"/>
<Obs _TYPE_="DATAPRIOR" VARIABLE="DATAPRIOR" LABEL="Data Prior" LEVEL="" EVENT="" ORDER="" FORMAT="" TYPE="N" COST="" USE="N"/>
<Obs _TYPE_="TRAINPRIOR" VARIABLE="TRAINPRIOR" LABEL="Training Prior" LEVEL="" EVENT="" ORDER="" FORMAT="" TYPE="N" COST="" USE="N"/>
<Obs _TYPE_="DECPRIOR" VARIABLE="DECPRIOR" LABEL="Decision Prior" LEVEL="" EVENT="" ORDER="" FORMAT="" TYPE="N" COST="" USE="N"/>
<Obs _TYPE_="PREDICTED" VARIABLE="P_survived1" LABEL="Predicted: survived=1" LEVEL="1" EVENT="" ORDER="" FORMAT="" TYPE="N" COST="" USE=""/>
<Obs _TYPE_="RESIDUAL" VARIABLE="R_survived1" LABEL="Residual: survived=1" LEVEL="1" EVENT="" ORDER="" FORMAT="" TYPE="N" COST="" USE=""/>
<Obs _TYPE_="PREDICTED" VARIABLE="P_survived0" LABEL="Predicted: survived=0" LEVEL="0" EVENT="" ORDER="" FORMAT="" TYPE="N" COST="" USE=""/>
<Obs _TYPE_="RESIDUAL" VARIABLE="R_survived0" LABEL="Residual: survived=0" LEVEL="0" EVENT="" ORDER="" FORMAT="" TYPE="N" COST="" USE=""/>
<Obs _TYPE_="FROM" VARIABLE="F_survived" LABEL="From: survived" LEVEL="" EVENT="" ORDER="" FORMAT="" TYPE="C" COST="" USE=""/>
<Obs _TYPE_="INTO" VARIABLE="I_survived" LABEL="Into: survived" LEVEL="" EVENT="" ORDER="" FORMAT="" TYPE="C" COST="" USE=""/>
</Data>
<Metadata>
<Variable NAME="_TYPE_" LABEL="Type" LENGTH="32" FORMAT="" TYPE="C"/>
<Variable NAME="VARIABLE" LABEL="Variable" LENGTH="32" FORMAT="" TYPE="C"/>
<Variable NAME="LABEL" LABEL="Label" LENGTH="40" FORMAT="" TYPE="C"/>
<Variable NAME="LEVEL" LABEL="Measurement Level" LENGTH="32" FORMAT="" TYPE="C"/>
<Variable NAME="EVENT" LABEL="Target Event" LENGTH="32" FORMAT="" TYPE="C"/>
<Variable NAME="ORDER" LABEL="Order" LENGTH="10" FORMAT="" TYPE="C"/>
<Variable NAME="FORMAT" LABEL="Format" LENGTH="32" FORMAT="" TYPE="C"/>
<Variable NAME="TYPE" LABEL="Type" LENGTH="1" FORMAT="" TYPE="C"/>
<Variable NAME="COST" LABEL="Cost" LENGTH="32" FORMAT="" TYPE="C"/>
<Variable NAME="USE" LABEL="Use" LENGTH="1" FORMAT="" TYPE="C"/>
</Metadata>
</DecMeta>
</survived>
</EmTargetAttributes>
<EmFileProperties>
<EMNOTES Type="EXTERNAL" Extension="txt">
<Data/>
</EMNOTES>
</EmFileProperties>
</PROPERTIES>
</NODE>
<NODE CLASS="SASHELP.EMUTIL.EMSCORE.CLASS" COMPONENT="Score" ID="Score" X="426" Y="168" DESCRIPTION="Score" DIAGRAMID="_ROOT_" PARENTID="" TOOLTYPE="ASSESS" TOOLPREFIX="Score">
<PROPERTIES OutputType="VIEW" FixedOutputNames="Y" HideVariables="N" HideInput="Y" HideTarget="Y" HideRejected="Y" HideAssess="Y" HidePredict="Y" HideFreq="Y" HideClassification="Y" HideResidual="Y" HideOther="Y" ScoreValidate="N" ScoreTest="N" CScore="Y" JScore="Y" PackageName="DEFAULT" UserPackageName="" PreferenceName="" OptimizedCode="Y" GraphReports="Y" ForceRun="N" RunAction="Train" Component="Score">
<VariableProperties/>
<EmVariableAttributes/>
<EmTargetAttributes/>
<EmFileProperties>
<EMNOTES Type="EXTERNAL" Extension="txt">
<Data/>
</EMNOTES>
</EmFileProperties>
</PROPERTIES>
</NODE>
</NODES>
<CONNECTIONS>
<CONNECTION FROM="Score" TO="EMCODE"/>
<CONNECTION FROM="Reg" TO="Score"/>
<CONNECTION FROM="FIMPORT" TO="Reg"/>
<CONNECTION FROM="FIMPORT2" TO="Score"/>
</CONNECTIONS>
<NOTES>
<DATA><![CDATA[
]]>
</DATA>
</NOTES>
</Workspace>
</_ROOT_>
@raleighlinda
Copy link
Author

Please edit the SAS code node to set the path/filename.csv of the exported scored dataset. See Properties Panel - Train Code Editor ...
Before you submit the resulting Excel file to Kaggle please delete the column name header row.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment