Created
March 21, 2013 14:34
-
-
Save raleighlinda/5213490 to your computer and use it in GitHub Desktop.
Titanic Enterprise Miner
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?xml version="1.0" encoding="UTF-8"?><_ROOT_ EMVERSION="12.1" ORIENTATION="HORIZONTAL"> | |
<Workspace> | |
<NODES> | |
<NODE CLASS="SASHELP.EMCORE.EMCODETOOL.CLASS" COMPONENT="SASCode" ID="EMCODE" X="426" Y="252" DESCRIPTION="SAS Code" DIAGRAMID="_ROOT_" PARENTID="" TOOLTYPE="UTILITY" TOOLPREFIX="EMCODE"> | |
<PROPERTIES UsePriors="Y" ToolType="UTILITY" DataNeeded="N" PublishCode="PUBLISH" ScoreCodeFormat="DATASTEP" MetaAdvisor="BASIC" ForceRun="N" RunAction="Train" Component="SASCode"> | |
<VariableProperties/> | |
<EmVariableAttributes/> | |
<EmTargetAttributes/> | |
<EmFileProperties> | |
<USERTRAINCODE Type="EXTERNAL" Extension="sas"> | |
<Data><![CDATA[ | |
data TEMP; | |
set &EM_IMPORT_SCORE(keep = EM_CLASSIFICATION); | |
run; | |
PROC EXPORT DATA = TEMP OUTFILE ='C:\Users\Linda\Documents\TitanicScored.CSV'; | |
RUN; | |
]]> | |
</Data> | |
</USERTRAINCODE> | |
<EMNOTES Type="EXTERNAL" Extension="txt"> | |
<Data/> | |
</EMNOTES> | |
</EmFileProperties> | |
</PROPERTIES> | |
</NODE> | |
<NODE CLASS="SASHELP.EMCORE.EXTENSION.CLASS" COMPONENT="FileImport" ID="FIMPORT" X="46" Y="166" DESCRIPTION="File Import Train" DIAGRAMID="_ROOT_" PARENTID="" TOOLTYPE="SAMPLE" TOOLPREFIX="FIMPORT"> | |
<PROPERTIES Location="CATALOG" Catalog="SASHELP.EMSAMP.Fimport.SOURCE" ImportType="Local" GuessRows="500" Delimiter="," NameRow="Y" SkipRows="0" MaxRows="1000000" MaxCols="10000" FileType="csv" Role="TRAIN" ForceRun="N" Summarize="N" AdvancedAdvisor="N" RunAction="Train" Component="FileImport" IFileName="C:\train.csv" AccessTable="NoTableName" UserID="NoUserID" Password="NoPassword"> | |
<VariableProperties/> | |
<EmVariableAttributes> | |
<DELTA Variable="survived" AttributeName="ROLE" AttributeValue="TARGET"/> | |
<DELTA Variable="survived" AttributeName="LEVEL" AttributeValue="BINARY"/> | |
</EmVariableAttributes> | |
<EmTargetAttributes/> | |
<EmFileProperties> | |
<USERTRAINCODE Type="EXTERNAL" Extension="sas"> | |
<Data/> | |
</USERTRAINCODE> | |
<VARIABLESETDELTA Type="EXTERNAL" Extension="txt"> | |
<Data><![CDATA[ | |
if NAME="survived" then do; | |
ROLE="TARGET"; | |
LEVEL="BINARY"; | |
ORDER=""; | |
DROP="N"; | |
end; | |
]]> | |
</Data> | |
</VARIABLESETDELTA> | |
<DELTACODE Type="EXTERNAL" Extension="txt"> | |
<Data><![CDATA[ | |
if NAME="survived" then do; | |
ROLE="TARGET"; | |
LEVEL="BINARY"; | |
ORDER=""; | |
end; | |
drop DROP; | |
]]> | |
</Data> | |
</DELTACODE> | |
<EMNOTES Type="EXTERNAL" Extension="txt"> | |
<Data/> | |
</EMNOTES> | |
</EmFileProperties> | |
</PROPERTIES> | |
</NODE> | |
<NODE CLASS="SASHELP.EMCORE.EXTENSION.CLASS" COMPONENT="FileImport" ID="FIMPORT2" X="428" Y="74" DESCRIPTION="File Import Test" DIAGRAMID="_ROOT_" PARENTID="" TOOLTYPE="SAMPLE" TOOLPREFIX="FIMPORT"> | |
<PROPERTIES Location="CATALOG" Catalog="SASHELP.EMSAMP.Fimport.SOURCE" ImportType="Local" GuessRows="500" Delimiter="," NameRow="Y" SkipRows="0" MaxRows="1000000" MaxCols="10000" FileType="csv" Role="SCORE" ForceRun="N" Summarize="N" AdvancedAdvisor="N" RunAction="Train" Component="FileImport" IFileName="C:\test.csv" AccessTable="NoTableName" UserID="NoUserID" Password="NoPassword"> | |
<VariableProperties/> | |
<EmVariableAttributes/> | |
<EmTargetAttributes/> | |
<EmFileProperties> | |
<USERTRAINCODE Type="EXTERNAL" Extension="sas"> | |
<Data/> | |
</USERTRAINCODE> | |
<VARIABLESETDELTA Type="EXTERNAL" Extension="txt"> | |
<Data/> | |
</VARIABLESETDELTA> | |
<DELTACODE Type="EXTERNAL" Extension="txt"> | |
<Data/> | |
</DELTACODE> | |
<EMNOTES Type="EXTERNAL" Extension="txt"> | |
<Data/> | |
</EMNOTES> | |
</EmFileProperties> | |
</PROPERTIES> | |
</NODE> | |
<NODE CLASS="SASHELP.EMMODL.REGRESSION.CLASS" COMPONENT="Regression" ID="Reg" X="232" Y="166" DESCRIPTION="Regression" DIAGRAMID="_ROOT_" PARENTID="" TOOLTYPE="MODEL" TOOLPREFIX="Reg"> | |
<PROPERTIES MainEffect="Y" TwoFactor="N" Polynomial="N" PolynomialDegree="2" Terms="N" Error="LOGISTIC" LinkFunction="LOGIT" SuppressIntercept="N" InputCoding="DEVIATION" MinResourceUse="N" ModelSelection="NONE" SelectionCriterion="DEFAULT" SelectionDefault="Y" Sequential="N" SlEntry="0.05" SlStay="0.05" Start="0" Stop="0" Force="0" Hierarchy="CLASS" Rule="NONE" MaxStep="." StepOutput="N" OptimizationTechnique="DEFAULT" ModelDefaults="Y" MaxIterations="." MaxFunctionCalls="." MaxCPUTime="1 HOUR" ConvDefaults="Y" AbsConvValue="-1.34078E154" AbsFValue="0" AbsFTime="1" AbsGValue="0.00001" AbsGTime="1" AbsXValue="1E-8" AbsXTime="1" FConvValue="0" FConvTimes="1" GConvValue="1E-6" GConvTimes="1" ClParm="N" Covout="N" CovB="N" CorB="N" Simple="N" SuppressOutput="N" Details="N" PrintDesignMatrix="N" SASSPDS="N" Performance="N" ExcludedVariable="REJECT" ForceRun="N" RunAction="Train" Component="Regression"> | |
<VariableProperties/> | |
<EmVariableAttributes> | |
<DELTA Variable="age" AttributeName="USE" AttributeValue="N"/> | |
<DELTA Variable="cabin" AttributeName="USE" AttributeValue="N"/> | |
<DELTA Variable="embarked" AttributeName="USE" AttributeValue="N"/> | |
<DELTA Variable="fare" AttributeName="USE" AttributeValue="N"/> | |
<DELTA Variable="parch" AttributeName="USE" AttributeValue="N"/> | |
<DELTA Variable="pclass" AttributeName="USE" AttributeValue="Y"/> | |
<DELTA Variable="sex" AttributeName="USE" AttributeValue="Y"/> | |
<DELTA Variable="sibsp" AttributeName="USE" AttributeValue="N"/> | |
<DELTA Variable="ticket" AttributeName="USE" AttributeValue="N"/> | |
</EmVariableAttributes> | |
<EmTargetAttributes> | |
<survived> | |
<DecData Table="EMWS4.Reg_survived_DD"> | |
<Data> | |
<Obs SURVIVED="1" COUNT="342" DATAPRIOR="0.3838383838" TRAINPRIOR="0.3838383838" DECPRIOR="." DECISION1="1" DECISION2="0"/> | |
<Obs SURVIVED="0" COUNT="549" DATAPRIOR="0.6161616162" TRAINPRIOR="0.6161616162" DECPRIOR="." DECISION1="0" DECISION2="1"/> | |
</Data> | |
<Metadata> | |
<Variable NAME="survived" LABEL="" LENGTH="32" FORMAT="" TYPE="C"/> | |
<Variable NAME="COUNT" LABEL="Level Counts" LENGTH="8" FORMAT="10." TYPE="N"/> | |
<Variable NAME="DATAPRIOR" LABEL="Data Proportions" LENGTH="8" FORMAT="" TYPE="N"/> | |
<Variable NAME="TRAINPRIOR" LABEL="Training Proportions" LENGTH="8" FORMAT="" TYPE="N"/> | |
<Variable NAME="DECPRIOR" LABEL="Decision Priors" LENGTH="8" FORMAT="" TYPE="N"/> | |
<Variable NAME="DECISION1" LABEL="1" LENGTH="8" FORMAT="" TYPE="N"/> | |
<Variable NAME="DECISION2" LABEL="0" LENGTH="8" FORMAT="" TYPE="N"/> | |
</Metadata> | |
</DecData> | |
<DecMeta Table="EMWS4.Reg_survived_DM"> | |
<Data> | |
<Obs _TYPE_="MATRIX" VARIABLE="" LABEL="" LEVEL="PROFIT" EVENT="" ORDER="" FORMAT="" TYPE="" COST="" USE="N"/> | |
<Obs _TYPE_="TARGET" VARIABLE="survived" LABEL="" LEVEL="BINARY" EVENT="1" ORDER="" FORMAT="BEST12.0" TYPE="N" COST="" USE=""/> | |
<Obs _TYPE_="DECISION" VARIABLE="DECISION1" LABEL="1" LEVEL="" EVENT="" ORDER="" FORMAT="" TYPE="N" COST="" USE="Y"/> | |
<Obs _TYPE_="DECISION" VARIABLE="DECISION2" LABEL="0" LEVEL="" EVENT="" ORDER="" FORMAT="" TYPE="N" COST="" USE="Y"/> | |
<Obs _TYPE_="DATAPRIOR" VARIABLE="DATAPRIOR" LABEL="Data Prior" LEVEL="" EVENT="" ORDER="" FORMAT="" TYPE="N" COST="" USE="N"/> | |
<Obs _TYPE_="TRAINPRIOR" VARIABLE="TRAINPRIOR" LABEL="Training Prior" LEVEL="" EVENT="" ORDER="" FORMAT="" TYPE="N" COST="" USE="N"/> | |
<Obs _TYPE_="DECPRIOR" VARIABLE="DECPRIOR" LABEL="Decision Prior" LEVEL="" EVENT="" ORDER="" FORMAT="" TYPE="N" COST="" USE="N"/> | |
<Obs _TYPE_="PREDICTED" VARIABLE="P_survived1" LABEL="Predicted: survived=1" LEVEL="1" EVENT="" ORDER="" FORMAT="" TYPE="N" COST="" USE=""/> | |
<Obs _TYPE_="RESIDUAL" VARIABLE="R_survived1" LABEL="Residual: survived=1" LEVEL="1" EVENT="" ORDER="" FORMAT="" TYPE="N" COST="" USE=""/> | |
<Obs _TYPE_="PREDICTED" VARIABLE="P_survived0" LABEL="Predicted: survived=0" LEVEL="0" EVENT="" ORDER="" FORMAT="" TYPE="N" COST="" USE=""/> | |
<Obs _TYPE_="RESIDUAL" VARIABLE="R_survived0" LABEL="Residual: survived=0" LEVEL="0" EVENT="" ORDER="" FORMAT="" TYPE="N" COST="" USE=""/> | |
<Obs _TYPE_="FROM" VARIABLE="F_survived" LABEL="From: survived" LEVEL="" EVENT="" ORDER="" FORMAT="" TYPE="C" COST="" USE=""/> | |
<Obs _TYPE_="INTO" VARIABLE="I_survived" LABEL="Into: survived" LEVEL="" EVENT="" ORDER="" FORMAT="" TYPE="C" COST="" USE=""/> | |
</Data> | |
<Metadata> | |
<Variable NAME="_TYPE_" LABEL="Type" LENGTH="32" FORMAT="" TYPE="C"/> | |
<Variable NAME="VARIABLE" LABEL="Variable" LENGTH="32" FORMAT="" TYPE="C"/> | |
<Variable NAME="LABEL" LABEL="Label" LENGTH="40" FORMAT="" TYPE="C"/> | |
<Variable NAME="LEVEL" LABEL="Measurement Level" LENGTH="32" FORMAT="" TYPE="C"/> | |
<Variable NAME="EVENT" LABEL="Target Event" LENGTH="32" FORMAT="" TYPE="C"/> | |
<Variable NAME="ORDER" LABEL="Order" LENGTH="10" FORMAT="" TYPE="C"/> | |
<Variable NAME="FORMAT" LABEL="Format" LENGTH="32" FORMAT="" TYPE="C"/> | |
<Variable NAME="TYPE" LABEL="Type" LENGTH="1" FORMAT="" TYPE="C"/> | |
<Variable NAME="COST" LABEL="Cost" LENGTH="32" FORMAT="" TYPE="C"/> | |
<Variable NAME="USE" LABEL="Use" LENGTH="1" FORMAT="" TYPE="C"/> | |
</Metadata> | |
</DecMeta> | |
</survived> | |
</EmTargetAttributes> | |
<EmFileProperties> | |
<EMNOTES Type="EXTERNAL" Extension="txt"> | |
<Data/> | |
</EMNOTES> | |
</EmFileProperties> | |
</PROPERTIES> | |
</NODE> | |
<NODE CLASS="SASHELP.EMUTIL.EMSCORE.CLASS" COMPONENT="Score" ID="Score" X="426" Y="168" DESCRIPTION="Score" DIAGRAMID="_ROOT_" PARENTID="" TOOLTYPE="ASSESS" TOOLPREFIX="Score"> | |
<PROPERTIES OutputType="VIEW" FixedOutputNames="Y" HideVariables="N" HideInput="Y" HideTarget="Y" HideRejected="Y" HideAssess="Y" HidePredict="Y" HideFreq="Y" HideClassification="Y" HideResidual="Y" HideOther="Y" ScoreValidate="N" ScoreTest="N" CScore="Y" JScore="Y" PackageName="DEFAULT" UserPackageName="" PreferenceName="" OptimizedCode="Y" GraphReports="Y" ForceRun="N" RunAction="Train" Component="Score"> | |
<VariableProperties/> | |
<EmVariableAttributes/> | |
<EmTargetAttributes/> | |
<EmFileProperties> | |
<EMNOTES Type="EXTERNAL" Extension="txt"> | |
<Data/> | |
</EMNOTES> | |
</EmFileProperties> | |
</PROPERTIES> | |
</NODE> | |
</NODES> | |
<CONNECTIONS> | |
<CONNECTION FROM="Score" TO="EMCODE"/> | |
<CONNECTION FROM="Reg" TO="Score"/> | |
<CONNECTION FROM="FIMPORT" TO="Reg"/> | |
<CONNECTION FROM="FIMPORT2" TO="Score"/> | |
</CONNECTIONS> | |
<NOTES> | |
<DATA><![CDATA[ | |
]]> | |
</DATA> | |
</NOTES> | |
</Workspace> | |
</_ROOT_> |
Please edit the SAS code node to set the path/filename.csv of the exported scored dataset. See Properties Panel - Train Code Editor ...
Before you submit the resulting Excel file to Kaggle please delete the column name header row.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
For the train Import file node, please set the path of your file, train.csv, by clicking Properties Panel - Train - Import File ...
repeat the procedure for the test import file node.