Skip to content

Instantly share code, notes, and snippets.

@drewlanenga
Created January 7, 2014 23:48
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save drewlanenga/8309069 to your computer and use it in GitHub Desktop.
Save drewlanenga/8309069 to your computer and use it in GitHub Desktop.
Exploring support for [transformations in PMML](http://www.dmg.org/v4-1/Transformations.html) with Pattern. (Environment notes: Running Vagrant with Cascading SDK 2.2 -- https://github.com/Cascading/vagrant-cascading-hadoop-cluster)
<?xml version="1.0"?>
<PMML version="4.1" xmlns="http://www.dmg.org/PMML-4_1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.dmg.org/PMML-4_1 http://www.dmg.org/v4-1/pmml-4-1.xsd">
<Header copyright="Copyright (c) 2014 lanenga" description="Linear Regression Model">
<Extension name="user" value="lanenga" extender="Rattle/PMML"/>
<Application name="Rattle/PMML" version="1.4"/>
<Timestamp>2014-01-07 15:33:34</Timestamp>
</Header>
<DataDictionary numberOfFields="4">
<DataField name="sepal_width" optype="continuous" dataType="double"/>
<DataField name="sepal_length" optype="continuous" dataType="double"/>
<DataField name="petal_length" optype="continuous" dataType="double"/>
<DataField name="petal_width" optype="continuous" dataType="double"/>
</DataDictionary>
<RegressionModel modelName="Linear_Regression_Model" functionName="regression" algorithmName="least squares">
<MiningSchema>
<MiningField name="sepal_width" usageType="predicted"/>
<MiningField name="sepal_length" usageType="active"/>
<MiningField name="petal_length" usageType="active"/>
<MiningField name="petal_width" usageType="active"/>
</MiningSchema>
<Output>
<OutputField name="Predicted_sepal_width" feature="predictedValue"/>
</Output>
<LocalTransformations>
<DerivedField name="derived_sepal_length" dataType="double" optype="continuous">
<NormContinuous field="sepal_length">
<LinearNorm orig="5.84333333333333" norm="0"/>
<LinearNorm orig="6.6713994613112" norm="1"/>
</NormContinuous>
</DerivedField>
<DerivedField name="derived_sepal_length1" dataType="double" optype="continuous">
<NormContinuous field="sepal_length">
<LinearNorm orig="5.84333333333333" norm="0"/>
<LinearNorm orig="6.6713994613112" norm="1"/>
</NormContinuous>
</DerivedField>
</LocalTransformations>
<RegressionTable intercept="4.5903781341142">
<NumericPredictor name="derived_sepal_length" exponent="1" coefficient="0.502690800263729"/>
<NumericPredictor name="petal_length" exponent="1" coefficient="-0.586032249036085"/>
<NumericPredictor name="petal_width" exponent="1" coefficient="0.558030342771049"/>
</RegressionTable>
</RegressionModel>
</PMML>
library(pmml)
library(pmmlTransformations)
## load the iris data (from pattern-example)
dat_folder <- 'data/'
iris <- read.table(file=paste(dat_folder, "iris.glm.tsv", sep="/"), sep="\t", quote="", na.strings="NULL", header=TRUE, encoding="UTF8")
## do the transformations
irisBox <- ZScoreXform(WrapData(iris)), xformInfo = "sepal_length")
## build the model
mod <- lm(sepal_width ~ derived_sepal_length + petal_length + petal_width, data = irisBox$data)
## predict the outcome
predicted <- predict(mod, irisBox$data)
## write predicted values out for comparison with pattern output later
irisBox$data$predict <- predicted
write.table(irisBox$data, file = paste(dat_folder, "lm.pmml.tsv", sep="/"), quote=FALSE, sep="\t", row.names=FALSE)
## export model to PMML
saveXML(pmml(mod, transforms = irisBox), file=paste(dat_folder, "lm.pmml.xml", sep="/"))
Meta VERSION="1" .
Job JOBID="job_201401062145_0004" JOBNAME="[B71FBB9C4B154515A86546AEEA00E965/C0823765103444A2B0CF7F6E44AC0FDF] classify/(1/1) out/classify_zscore" USER="vagrant" SUBMIT_TIME="1389137802713" JOBCONF="hdfs://master\.local:9000/tmp/hadoop-root/mapred/staging/vagrant/\.staging/job_201401062145_0004/job\.xml" VIEW_JOB="*" MODIFY_JOB="*" JOB_QUEUE="default" WORKFLOW_ID="" WORKFLOW_NAME="" WORKFLOW_NODE_NAME="" WORKFLOW_ADJACENCIES="" WORKFLOW_TAGS="" .
Job JOBID="job_201401062145_0004" JOB_PRIORITY="NORMAL" .
Job JOBID="job_201401062145_0004" LAUNCH_TIME="1389137802860" TOTAL_MAPS="2" TOTAL_REDUCES="0" JOB_STATUS="PREP" .
Task TASKID="task_201401062145_0004_m_000003" TASK_TYPE="SETUP" START_TIME="1389137803114" SPLITS="" .
MapAttempt TASK_TYPE="SETUP" TASKID="task_201401062145_0004_m_000003" TASK_ATTEMPT_ID="attempt_201401062145_0004_m_000003_0" START_TIME="1389137803332" TRACKER_NAME="tracker_hadoop2\.local:localhost/127\.0\.0\.1:41596" HTTP_PORT="50060" LOCALITY="OFF_SWITCH" AVATAAR="VIRGIN" .
MapAttempt TASK_TYPE="SETUP" TASKID="task_201401062145_0004_m_000003" TASK_ATTEMPT_ID="attempt_201401062145_0004_m_000003_0" TASK_STATUS="SUCCESS" FINISH_TIME="1389137804848" HOSTNAME="/default-rack/hadoop2\.local" STATE_STRING="setup" COUNTERS="{(FileSystemCounters)(FileSystemCounters)[(FILE_BYTES_WRITTEN)(FILE_BYTES_WRITTEN)(63454)]}{(org\.apache\.hadoop\.mapred\.Task$Counter)(Map-Reduce Framework)[(PHYSICAL_MEMORY_BYTES)(Physical memory \\(bytes\\) snapshot)(83607552)][(SPILLED_RECORDS)(Spilled Records)(0)][(CPU_MILLISECONDS)(CPU time spent \\(ms\\))(50)][(COMMITTED_HEAP_BYTES)(Total committed heap usage \\(bytes\\))(7798784)][(VIRTUAL_MEMORY_BYTES)(Virtual memory \\(bytes\\) snapshot)(1059917824)]}" .
Task TASKID="task_201401062145_0004_m_000003" TASK_TYPE="SETUP" TASK_STATUS="SUCCESS" FINISH_TIME="1389137804948" COUNTERS="{(FileSystemCounters)(FileSystemCounters)[(FILE_BYTES_WRITTEN)(FILE_BYTES_WRITTEN)(63454)]}{(org\.apache\.hadoop\.mapred\.Task$Counter)(Map-Reduce Framework)[(PHYSICAL_MEMORY_BYTES)(Physical memory \\(bytes\\) snapshot)(83607552)][(SPILLED_RECORDS)(Spilled Records)(0)][(CPU_MILLISECONDS)(CPU time spent \\(ms\\))(50)][(COMMITTED_HEAP_BYTES)(Total committed heap usage \\(bytes\\))(7798784)][(VIRTUAL_MEMORY_BYTES)(Virtual memory \\(bytes\\) snapshot)(1059917824)]}" .
Job JOBID="job_201401062145_0004" JOB_STATUS="RUNNING" .
Task TASKID="task_201401062145_0004_m_000000" TASK_TYPE="MAP" START_TIME="1389137804950" SPLITS="/default-rack/hadoop2\.local,/default-rack/hadoop1\.local,/default-rack/hadoop3\.local" .
Task TASKID="task_201401062145_0004_m_000001" TASK_TYPE="MAP" START_TIME="1389137804952" SPLITS="/default-rack/hadoop2\.local,/default-rack/hadoop1\.local,/default-rack/hadoop3\.local" .
MapAttempt TASK_TYPE="MAP" TASKID="task_201401062145_0004_m_000000" TASK_ATTEMPT_ID="attempt_201401062145_0004_m_000000_0" START_TIME="1389137804984" TRACKER_NAME="tracker_hadoop2\.local:localhost/127\.0\.0\.1:41596" HTTP_PORT="50060" LOCALITY="NODE_LOCAL" AVATAAR="VIRGIN" .
MapAttempt TASK_TYPE="MAP" TASKID="task_201401062145_0004_m_000000" TASK_ATTEMPT_ID="attempt_201401062145_0004_m_000000_0" TASK_STATUS="FAILED" FINISH_TIME="1389137810739" HOSTNAME="hadoop2\.local" ERROR="cascading\.tuple\.FieldsResolverException: could not select fields: [{1}:'derived_sepal_length'], from: [{3}:'sepal_length', 'petal_length', 'petal_width']
at cascading\.tuple\.Fields\.indexOf(Fields\.java:1008)
at cascading\.tuple\.Fields\.getPos(Fields\.java:986)
at cascading\.pattern\.model\.generalregression\.expression\.ParameterExpression\.<init>(ParameterExpression\.java:98)
at cascading\.pattern\.model\.generalregression\.Parameter\.createExpression(Parameter\.java:119)
at cascading\.pattern\.model\.generalregression\.RegressionTable\.bind(RegressionTable\.java:97)
at cascading\.pattern\.model\.generalregression\.GeneralRegressionSpec\.getRegressionTableEvaluators(GeneralRegressionSpec\.java:123)
at cascading\.pattern\.model\.generalregression\.BaseRegressionFunction\.prepare(BaseRegressionFunction\.java:53)
at cascading\.pattern\.model\.generalregression\.PredictionRegressionFunction\.prepare(PredictionRegressionFunction\.java:33)
at cascading\.flow\.stream\.OperatorStage\.prepare(OperatorStage\.java:284)
at cascading\.flow\.stream\.StreamGraph\.prepare(StreamGraph\.java:167)
at cascading\.flow\.hadoop\.FlowMapper\.run(FlowMapper\.java:110)
at org\.apache\.hadoop\.mapred\.MapTask\.runOldMapper(MapTask\.java:430)
at org\.apache\.hadoop\.mapred\.MapTask\.run(MapTask\.java:366)
at org\.apache\.hadoop\.mapred\.Child$4\.run(Child\.java:255)
at java\.security\.AccessController\.doPrivileged(Native Method)
at javax\.security\.auth\.Subject\.doAs(Subject\.java:416)
at org\.apache\.hadoop\.security\.UserGroupInformation\.doAs(UserGroupInformation\.java:1190)
at org\.apache\.hadoop\.mapred\.Child\.main(Child\.java:249)
" .
MapAttempt TASK_TYPE="MAP" TASKID="task_201401062145_0004_m_000001" TASK_ATTEMPT_ID="attempt_201401062145_0004_m_000001_0" START_TIME="1389137805266" TRACKER_NAME="tracker_hadoop1\.local:localhost/127\.0\.0\.1:54014" HTTP_PORT="50060" LOCALITY="NODE_LOCAL" AVATAAR="VIRGIN" .
MapAttempt TASK_TYPE="MAP" TASKID="task_201401062145_0004_m_000001" TASK_ATTEMPT_ID="attempt_201401062145_0004_m_000001_0" TASK_STATUS="FAILED" FINISH_TIME="1389137811063" HOSTNAME="hadoop1\.local" ERROR="cascading\.tuple\.FieldsResolverException: could not select fields: [{1}:'derived_sepal_length'], from: [{3}:'sepal_length', 'petal_length', 'petal_width']
at cascading\.tuple\.Fields\.indexOf(Fields\.java:1008)
at cascading\.tuple\.Fields\.getPos(Fields\.java:986)
at cascading\.pattern\.model\.generalregression\.expression\.ParameterExpression\.<init>(ParameterExpression\.java:98)
at cascading\.pattern\.model\.generalregression\.Parameter\.createExpression(Parameter\.java:119)
at cascading\.pattern\.model\.generalregression\.RegressionTable\.bind(RegressionTable\.java:97)
at cascading\.pattern\.model\.generalregression\.GeneralRegressionSpec\.getRegressionTableEvaluators(GeneralRegressionSpec\.java:123)
at cascading\.pattern\.model\.generalregression\.BaseRegressionFunction\.prepare(BaseRegressionFunction\.java:53)
at cascading\.pattern\.model\.generalregression\.PredictionRegressionFunction\.prepare(PredictionRegressionFunction\.java:33)
at cascading\.flow\.stream\.OperatorStage\.prepare(OperatorStage\.java:284)
at cascading\.flow\.stream\.StreamGraph\.prepare(StreamGraph\.java:167)
at cascading\.flow\.hadoop\.FlowMapper\.run(FlowMapper\.java:110)
at org\.apache\.hadoop\.mapred\.MapTask\.runOldMapper(MapTask\.java:430)
at org\.apache\.hadoop\.mapred\.MapTask\.run(MapTask\.java:366)
at org\.apache\.hadoop\.mapred\.Child$4\.run(Child\.java:255)
at java\.security\.AccessController\.doPrivileged(Native Method)
at javax\.security\.auth\.Subject\.doAs(Subject\.java:416)
at org\.apache\.hadoop\.security\.UserGroupInformation\.doAs(UserGroupInformation\.java:1190)
at org\.apache\.hadoop\.mapred\.Child\.main(Child\.java:249)
" .
MapAttempt TASK_TYPE="MAP" TASKID="task_201401062145_0004_m_000000" TASK_ATTEMPT_ID="attempt_201401062145_0004_m_000000_1" START_TIME="1389137811201" TRACKER_NAME="tracker_hadoop3\.local:localhost/127\.0\.0\.1:36494" HTTP_PORT="50060" LOCALITY="NODE_LOCAL" AVATAAR="VIRGIN" .
MapAttempt TASK_TYPE="MAP" TASKID="task_201401062145_0004_m_000000" TASK_ATTEMPT_ID="attempt_201401062145_0004_m_000000_1" TASK_STATUS="FAILED" FINISH_TIME="1389137817127" HOSTNAME="hadoop3\.local" ERROR="cascading\.tuple\.FieldsResolverException: could not select fields: [{1}:'derived_sepal_length'], from: [{3}:'sepal_length', 'petal_length', 'petal_width']
at cascading\.tuple\.Fields\.indexOf(Fields\.java:1008)
at cascading\.tuple\.Fields\.getPos(Fields\.java:986)
at cascading\.pattern\.model\.generalregression\.expression\.ParameterExpression\.<init>(ParameterExpression\.java:98)
at cascading\.pattern\.model\.generalregression\.Parameter\.createExpression(Parameter\.java:119)
at cascading\.pattern\.model\.generalregression\.RegressionTable\.bind(RegressionTable\.java:97)
at cascading\.pattern\.model\.generalregression\.GeneralRegressionSpec\.getRegressionTableEvaluators(GeneralRegressionSpec\.java:123)
at cascading\.pattern\.model\.generalregression\.BaseRegressionFunction\.prepare(BaseRegressionFunction\.java:53)
at cascading\.pattern\.model\.generalregression\.PredictionRegressionFunction\.prepare(PredictionRegressionFunction\.java:33)
at cascading\.flow\.stream\.OperatorStage\.prepare(OperatorStage\.java:284)
at cascading\.flow\.stream\.StreamGraph\.prepare(StreamGraph\.java:167)
at cascading\.flow\.hadoop\.FlowMapper\.run(FlowMapper\.java:110)
at org\.apache\.hadoop\.mapred\.MapTask\.runOldMapper(MapTask\.java:430)
at org\.apache\.hadoop\.mapred\.MapTask\.run(MapTask\.java:366)
at org\.apache\.hadoop\.mapred\.Child$4\.run(Child\.java:255)
at java\.security\.AccessController\.doPrivileged(Native Method)
at javax\.security\.auth\.Subject\.doAs(Subject\.java:416)
at org\.apache\.hadoop\.security\.UserGroupInformation\.doAs(UserGroupInformation\.java:1190)
at org\.apache\.hadoop\.mapred\.Child\.main(Child\.java:249)
" .
MapAttempt TASK_TYPE="MAP" TASKID="task_201401062145_0004_m_000001" TASK_ATTEMPT_ID="attempt_201401062145_0004_m_000001_1" START_TIME="1389137811381" TRACKER_NAME="tracker_hadoop2\.local:localhost/127\.0\.0\.1:41596" HTTP_PORT="50060" LOCALITY="NODE_LOCAL" AVATAAR="VIRGIN" .
MapAttempt TASK_TYPE="MAP" TASKID="task_201401062145_0004_m_000001" TASK_ATTEMPT_ID="attempt_201401062145_0004_m_000001_1" TASK_STATUS="FAILED" FINISH_TIME="1389137817235" HOSTNAME="hadoop2\.local" ERROR="cascading\.tuple\.FieldsResolverException: could not select fields: [{1}:'derived_sepal_length'], from: [{3}:'sepal_length', 'petal_length', 'petal_width']
at cascading\.tuple\.Fields\.indexOf(Fields\.java:1008)
at cascading\.tuple\.Fields\.getPos(Fields\.java:986)
at cascading\.pattern\.model\.generalregression\.expression\.ParameterExpression\.<init>(ParameterExpression\.java:98)
at cascading\.pattern\.model\.generalregression\.Parameter\.createExpression(Parameter\.java:119)
at cascading\.pattern\.model\.generalregression\.RegressionTable\.bind(RegressionTable\.java:97)
at cascading\.pattern\.model\.generalregression\.GeneralRegressionSpec\.getRegressionTableEvaluators(GeneralRegressionSpec\.java:123)
at cascading\.pattern\.model\.generalregression\.BaseRegressionFunction\.prepare(BaseRegressionFunction\.java:53)
at cascading\.pattern\.model\.generalregression\.PredictionRegressionFunction\.prepare(PredictionRegressionFunction\.java:33)
at cascading\.flow\.stream\.OperatorStage\.prepare(OperatorStage\.java:284)
at cascading\.flow\.stream\.StreamGraph\.prepare(StreamGraph\.java:167)
at cascading\.flow\.hadoop\.FlowMapper\.run(FlowMapper\.java:110)
at org\.apache\.hadoop\.mapred\.MapTask\.runOldMapper(MapTask\.java:430)
at org\.apache\.hadoop\.mapred\.MapTask\.run(MapTask\.java:366)
at org\.apache\.hadoop\.mapred\.Child$4\.run(Child\.java:255)
at java\.security\.AccessController\.doPrivileged(Native Method)
at javax\.security\.auth\.Subject\.doAs(Subject\.java:416)
at org\.apache\.hadoop\.security\.UserGroupInformation\.doAs(UserGroupInformation\.java:1190)
at org\.apache\.hadoop\.mapred\.Child\.main(Child\.java:249)
" .
MapAttempt TASK_TYPE="MAP" TASKID="task_201401062145_0004_m_000000" TASK_ATTEMPT_ID="attempt_201401062145_0004_m_000000_2" START_TIME="1389137817488" TRACKER_NAME="tracker_hadoop1\.local:localhost/127\.0\.0\.1:54014" HTTP_PORT="50060" LOCALITY="NODE_LOCAL" AVATAAR="VIRGIN" .
MapAttempt TASK_TYPE="MAP" TASKID="task_201401062145_0004_m_000000" TASK_ATTEMPT_ID="attempt_201401062145_0004_m_000000_2" TASK_STATUS="FAILED" FINISH_TIME="1389137823252" HOSTNAME="hadoop1\.local" ERROR="cascading\.tuple\.FieldsResolverException: could not select fields: [{1}:'derived_sepal_length'], from: [{3}:'sepal_length', 'petal_length', 'petal_width']
at cascading\.tuple\.Fields\.indexOf(Fields\.java:1008)
at cascading\.tuple\.Fields\.getPos(Fields\.java:986)
at cascading\.pattern\.model\.generalregression\.expression\.ParameterExpression\.<init>(ParameterExpression\.java:98)
at cascading\.pattern\.model\.generalregression\.Parameter\.createExpression(Parameter\.java:119)
at cascading\.pattern\.model\.generalregression\.RegressionTable\.bind(RegressionTable\.java:97)
at cascading\.pattern\.model\.generalregression\.GeneralRegressionSpec\.getRegressionTableEvaluators(GeneralRegressionSpec\.java:123)
at cascading\.pattern\.model\.generalregression\.BaseRegressionFunction\.prepare(BaseRegressionFunction\.java:53)
at cascading\.pattern\.model\.generalregression\.PredictionRegressionFunction\.prepare(PredictionRegressionFunction\.java:33)
at cascading\.flow\.stream\.OperatorStage\.prepare(OperatorStage\.java:284)
at cascading\.flow\.stream\.StreamGraph\.prepare(StreamGraph\.java:167)
at cascading\.flow\.hadoop\.FlowMapper\.run(FlowMapper\.java:110)
at org\.apache\.hadoop\.mapred\.MapTask\.runOldMapper(MapTask\.java:430)
at org\.apache\.hadoop\.mapred\.MapTask\.run(MapTask\.java:366)
at org\.apache\.hadoop\.mapred\.Child$4\.run(Child\.java:255)
at java\.security\.AccessController\.doPrivileged(Native Method)
at javax\.security\.auth\.Subject\.doAs(Subject\.java:416)
at org\.apache\.hadoop\.security\.UserGroupInformation\.doAs(UserGroupInformation\.java:1190)
at org\.apache\.hadoop\.mapred\.Child\.main(Child\.java:249)
" .
MapAttempt TASK_TYPE="MAP" TASKID="task_201401062145_0004_m_000001" TASK_ATTEMPT_ID="attempt_201401062145_0004_m_000001_2" START_TIME="1389137817524" TRACKER_NAME="tracker_hadoop3\.local:localhost/127\.0\.0\.1:36494" HTTP_PORT="50060" LOCALITY="NODE_LOCAL" AVATAAR="VIRGIN" .
MapAttempt TASK_TYPE="MAP" TASKID="task_201401062145_0004_m_000001" TASK_ATTEMPT_ID="attempt_201401062145_0004_m_000001_2" TASK_STATUS="FAILED" FINISH_TIME="1389137823285" HOSTNAME="hadoop3\.local" ERROR="cascading\.tuple\.FieldsResolverException: could not select fields: [{1}:'derived_sepal_length'], from: [{3}:'sepal_length', 'petal_length', 'petal_width']
at cascading\.tuple\.Fields\.indexOf(Fields\.java:1008)
at cascading\.tuple\.Fields\.getPos(Fields\.java:986)
at cascading\.pattern\.model\.generalregression\.expression\.ParameterExpression\.<init>(ParameterExpression\.java:98)
at cascading\.pattern\.model\.generalregression\.Parameter\.createExpression(Parameter\.java:119)
at cascading\.pattern\.model\.generalregression\.RegressionTable\.bind(RegressionTable\.java:97)
at cascading\.pattern\.model\.generalregression\.GeneralRegressionSpec\.getRegressionTableEvaluators(GeneralRegressionSpec\.java:123)
at cascading\.pattern\.model\.generalregression\.BaseRegressionFunction\.prepare(BaseRegressionFunction\.java:53)
at cascading\.pattern\.model\.generalregression\.PredictionRegressionFunction\.prepare(PredictionRegressionFunction\.java:33)
at cascading\.flow\.stream\.OperatorStage\.prepare(OperatorStage\.java:284)
at cascading\.flow\.stream\.StreamGraph\.prepare(StreamGraph\.java:167)
at cascading\.flow\.hadoop\.FlowMapper\.run(FlowMapper\.java:110)
at org\.apache\.hadoop\.mapred\.MapTask\.runOldMapper(MapTask\.java:430)
at org\.apache\.hadoop\.mapred\.MapTask\.run(MapTask\.java:366)
at org\.apache\.hadoop\.mapred\.Child$4\.run(Child\.java:255)
at java\.security\.AccessController\.doPrivileged(Native Method)
at javax\.security\.auth\.Subject\.doAs(Subject\.java:416)
at org\.apache\.hadoop\.security\.UserGroupInformation\.doAs(UserGroupInformation\.java:1190)
at org\.apache\.hadoop\.mapred\.Child\.main(Child\.java:249)
" .
MapAttempt TASK_TYPE="MAP" TASKID="task_201401062145_0004_m_000000" TASK_ATTEMPT_ID="attempt_201401062145_0004_m_000000_3" START_TIME="1389137823275" TRACKER_NAME="tracker_hadoop1\.local:localhost/127\.0\.0\.1:54014" HTTP_PORT="50060" LOCALITY="NODE_LOCAL" AVATAAR="VIRGIN" .
MapAttempt TASK_TYPE="MAP" TASKID="task_201401062145_0004_m_000000" TASK_ATTEMPT_ID="attempt_201401062145_0004_m_000000_3" TASK_STATUS="FAILED" FINISH_TIME="1389137828904" HOSTNAME="hadoop1\.local" ERROR="cascading\.tuple\.FieldsResolverException: could not select fields: [{1}:'derived_sepal_length'], from: [{3}:'sepal_length', 'petal_length', 'petal_width']
at cascading\.tuple\.Fields\.indexOf(Fields\.java:1008)
at cascading\.tuple\.Fields\.getPos(Fields\.java:986)
at cascading\.pattern\.model\.generalregression\.expression\.ParameterExpression\.<init>(ParameterExpression\.java:98)
at cascading\.pattern\.model\.generalregression\.Parameter\.createExpression(Parameter\.java:119)
at cascading\.pattern\.model\.generalregression\.RegressionTable\.bind(RegressionTable\.java:97)
at cascading\.pattern\.model\.generalregression\.GeneralRegressionSpec\.getRegressionTableEvaluators(GeneralRegressionSpec\.java:123)
at cascading\.pattern\.model\.generalregression\.BaseRegressionFunction\.prepare(BaseRegressionFunction\.java:53)
at cascading\.pattern\.model\.generalregression\.PredictionRegressionFunction\.prepare(PredictionRegressionFunction\.java:33)
at cascading\.flow\.stream\.OperatorStage\.prepare(OperatorStage\.java:284)
at cascading\.flow\.stream\.StreamGraph\.prepare(StreamGraph\.java:167)
at cascading\.flow\.hadoop\.FlowMapper\.run(FlowMapper\.java:110)
at org\.apache\.hadoop\.mapred\.MapTask\.runOldMapper(MapTask\.java:430)
at org\.apache\.hadoop\.mapred\.MapTask\.run(MapTask\.java:366)
at org\.apache\.hadoop\.mapred\.Child$4\.run(Child\.java:255)
at java\.security\.AccessController\.doPrivileged(Native Method)
at javax\.security\.auth\.Subject\.doAs(Subject\.java:416)
at org\.apache\.hadoop\.security\.UserGroupInformation\.doAs(UserGroupInformation\.java:1190)
at org\.apache\.hadoop\.mapred\.Child\.main(Child\.java:249)
" .
Task TASKID="task_201401062145_0004_m_000000" TASK_TYPE="MAP" TASK_STATUS="FAILED" FINISH_TIME="1389137828904" ERROR="cascading\.tuple\.FieldsResolverException: could not select fields: [{1}:'derived_sepal_length'], from: [{3}:'sepal_length', 'petal_length', 'petal_width']
at cascading\.tuple\.Fields\.indexOf(Fields\.java:1008)
at cascading\.tuple\.Fields\.getPos(Fields\.java:986)
at cascading\.pattern\.model\.generalregression\.expression\.ParameterExpression\.<init>(ParameterExpression\.java:98)
at cascading\.pattern\.model\.generalregression\.Parameter\.createExpression(Parameter\.java:119)
at cascading\.pattern\.model\.generalregression\.RegressionTable\.bind(RegressionTable\.java:97)
at cascading\.pattern\.model\.generalregression\.GeneralRegressionSpec\.getRegressionTableEvaluators(GeneralRegressionSpec\.java:123)
at cascading\.pattern\.model\.generalregression\.BaseRegressionFunction\.prepare(BaseRegressionFunction\.java:53)
at cascading\.pattern\.model\.generalregression\.PredictionRegressionFunction\.prepare(PredictionRegressionFunction\.java:33)
at cascading\.flow\.stream\.OperatorStage\.prepare(OperatorStage\.java:284)
at cascading\.flow\.stream\.StreamGraph\.prepare(StreamGraph\.java:167)
at cascading\.flow\.hadoop\.FlowMapper\.run(FlowMapper\.java:110)
at org\.apache\.hadoop\.mapred\.MapTask\.runOldMapper(MapTask\.java:430)
at org\.apache\.hadoop\.mapred\.MapTask\.run(MapTask\.java:366)
at org\.apache\.hadoop\.mapred\.Child$4\.run(Child\.java:255)
at java\.security\.AccessController\.doPrivileged(Native Method)
at javax\.security\.auth\.Subject\.doAs(Subject\.java:416)
at org\.apache\.hadoop\.security\.UserGroupInformation\.doAs(UserGroupInformation\.java:1190)
at org\.apache\.hadoop\.mapred\.Child\.main(Child\.java:249)
" TASK_ATTEMPT_ID="" .
Task TASKID="task_201401062145_0004_m_000002" TASK_TYPE="CLEANUP" START_TIME="1389137829026" SPLITS="" .
MapAttempt TASK_TYPE="MAP" TASKID="task_201401062145_0004_m_000001" TASK_ATTEMPT_ID="attempt_201401062145_0004_m_000001_3" START_TIME="1389137823297" TRACKER_NAME="tracker_hadoop3\.local:localhost/127\.0\.0\.1:36494" HTTP_PORT="50060" LOCALITY="NODE_LOCAL" AVATAAR="VIRGIN" .
MapAttempt TASK_TYPE="MAP" TASKID="task_201401062145_0004_m_000001" TASK_ATTEMPT_ID="attempt_201401062145_0004_m_000001_3" TASK_STATUS="FAILED" FINISH_TIME="1389137828935" HOSTNAME="hadoop3\.local" ERROR="cascading\.tuple\.FieldsResolverException: could not select fields: [{1}:'derived_sepal_length'], from: [{3}:'sepal_length', 'petal_length', 'petal_width']
at cascading\.tuple\.Fields\.indexOf(Fields\.java:1008)
at cascading\.tuple\.Fields\.getPos(Fields\.java:986)
at cascading\.pattern\.model\.generalregression\.expression\.ParameterExpression\.<init>(ParameterExpression\.java:98)
at cascading\.pattern\.model\.generalregression\.Parameter\.createExpression(Parameter\.java:119)
at cascading\.pattern\.model\.generalregression\.RegressionTable\.bind(RegressionTable\.java:97)
at cascading\.pattern\.model\.generalregression\.GeneralRegressionSpec\.getRegressionTableEvaluators(GeneralRegressionSpec\.java:123)
at cascading\.pattern\.model\.generalregression\.BaseRegressionFunction\.prepare(BaseRegressionFunction\.java:53)
at cascading\.pattern\.model\.generalregression\.PredictionRegressionFunction\.prepare(PredictionRegressionFunction\.java:33)
at cascading\.flow\.stream\.OperatorStage\.prepare(OperatorStage\.java:284)
at cascading\.flow\.stream\.StreamGraph\.prepare(StreamGraph\.java:167)
at cascading\.flow\.hadoop\.FlowMapper\.run(FlowMapper\.java:110)
at org\.apache\.hadoop\.mapred\.MapTask\.runOldMapper(MapTask\.java:430)
at org\.apache\.hadoop\.mapred\.MapTask\.run(MapTask\.java:366)
at org\.apache\.hadoop\.mapred\.Child$4\.run(Child\.java:255)
at java\.security\.AccessController\.doPrivileged(Native Method)
at javax\.security\.auth\.Subject\.doAs(Subject\.java:416)
at org\.apache\.hadoop\.security\.UserGroupInformation\.doAs(UserGroupInformation\.java:1190)
at org\.apache\.hadoop\.mapred\.Child\.main(Child\.java:249)
" .
MapAttempt TASK_TYPE="CLEANUP" TASKID="task_201401062145_0004_m_000002" TASK_ATTEMPT_ID="attempt_201401062145_0004_m_000002_0" START_TIME="1389137829061" TRACKER_NAME="tracker_hadoop1\.local:localhost/127\.0\.0\.1:54014" HTTP_PORT="50060" LOCALITY="OFF_SWITCH" AVATAAR="VIRGIN" .
MapAttempt TASK_TYPE="CLEANUP" TASKID="task_201401062145_0004_m_000002" TASK_ATTEMPT_ID="attempt_201401062145_0004_m_000002_0" TASK_STATUS="SUCCESS" FINISH_TIME="1389137830569" HOSTNAME="/default-rack/hadoop1\.local" STATE_STRING="cleanup" COUNTERS="{(FileSystemCounters)(FileSystemCounters)[(FILE_BYTES_WRITTEN)(FILE_BYTES_WRITTEN)(63454)]}{(org\.apache\.hadoop\.mapred\.Task$Counter)(Map-Reduce Framework)[(PHYSICAL_MEMORY_BYTES)(Physical memory \\(bytes\\) snapshot)(88141824)][(SPILLED_RECORDS)(Spilled Records)(0)][(CPU_MILLISECONDS)(CPU time spent \\(ms\\))(60)][(COMMITTED_HEAP_BYTES)(Total committed heap usage \\(bytes\\))(7798784)][(VIRTUAL_MEMORY_BYTES)(Virtual memory \\(bytes\\) snapshot)(1059917824)]}" .
Task TASKID="task_201401062145_0004_m_000002" TASK_TYPE="CLEANUP" TASK_STATUS="SUCCESS" FINISH_TIME="1389137830550" COUNTERS="{(FileSystemCounters)(FileSystemCounters)[(FILE_BYTES_WRITTEN)(FILE_BYTES_WRITTEN)(63454)]}{(org\.apache\.hadoop\.mapred\.Task$Counter)(Map-Reduce Framework)[(PHYSICAL_MEMORY_BYTES)(Physical memory \\(bytes\\) snapshot)(88141824)][(SPILLED_RECORDS)(Spilled Records)(0)][(CPU_MILLISECONDS)(CPU time spent \\(ms\\))(60)][(COMMITTED_HEAP_BYTES)(Total committed heap usage \\(bytes\\))(7798784)][(VIRTUAL_MEMORY_BYTES)(Virtual memory \\(bytes\\) snapshot)(1059917824)]}" .
Job JOBID="job_201401062145_0004" FINISH_TIME="1389137830551" JOB_STATUS="FAILED" FINISHED_MAPS="0" FINISHED_REDUCES="0" FAIL_REASON="# of failed Map Tasks exceeded allowed limit\. FailedCount: 1\. LastFailedTask: task_201401062145_0004_m_000000" .
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment