Last active
January 4, 2016 02:29
-
-
Save RanaivosonHerimanitra/8555071 to your computer and use it in GitHub Desktop.
This code written with the 'Rcpp' package replaces NA's values of numeric/integer columns by their mean. It's designed to work with big dataframe with thousand of columns and rows.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <Rcpp.h> | |
using namespace Rcpp; | |
// [[Rcpp::export]] | |
List modiframe(DataFrame& df ) { | |
//nrow and ncol of the dataframe: | |
int nrow = df.nrows(), ncol= df.size() ; | |
double moy(0); | |
//define an empty list~dataframe | |
List output(ncol) ; | |
//loop through columns and copy particularities: | |
///and through rows of each of them then | |
//compute the MEAN for non-missing elts: | |
for( int i=0; i<ncol; i++) { | |
output[i] = df[i] ; | |
//names[i] = outputnames[i] ; | |
NumericVector tmp=df[i]; | |
for (int j=0; j<nrow; j++) { | |
if ( R_IsNA(tmp[j])==false ) { | |
moy=moy+tmp[j]; | |
} | |
} | |
moy=moy/nrow; | |
//loop through columns again and impute missing values:: | |
for (int j=0; j<nrow; j++) { | |
if ( R_IsNA(tmp[j]) ) { | |
//Rcout << "impute missing value at position " << j + 1 << " of column f"<< i + 1 <<std::endl; | |
tmp[j]=moy; | |
output[i]=tmp; | |
} | |
} | |
} | |
//to render the ouptut dataframe: | |
output.attr("names")=df.attr("names"); | |
output.attr("class") = df.attr("class") ; | |
output.attr("row.names") = df.attr("row.names") ; | |
return wrap(output) ; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment