Created
August 17, 2020 13:24
-
-
Save risenW/a47a8a61e337214ca1967bb20f5ba1cd to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const dfd = require("danfojs-node") | |
const tf = require("@tensorflow/tfjs-node") | |
async function load_process_data() { | |
let df = await dfd.read_csv("https://web.stanford.edu/class/archive/cs/cs109/cs109.1166/stuff/titanic.csv") | |
//A feature engineering: Extract all titles from names columns | |
let title = df['Name'].apply((x) => { return x.split(".")[0] }).values | |
//replace in df | |
df.addColumn({ column: "Name", value: title }) | |
//label Encode Name feature | |
let encoder = new dfd.LabelEncoder() | |
let cols = ["Sex", "Name"] | |
cols.forEach(col => { | |
encoder.fit(df[col]) | |
enc_val = encoder.transform(df[col]) | |
df.addColumn({ column: col, value: enc_val }) | |
}) | |
let Xtrain,ytrain; | |
Xtrain = df.iloc({ columns: [`1:`] }) | |
ytrain = df['Survived'] | |
// Standardize the data with MinMaxScaler | |
let scaler = new dfd.MinMaxScaler() | |
scaler.fit(Xtrain) | |
Xtrain = scaler.transform(Xtrain) | |
return [Xtrain.tensor, ytrain.tensor] //return the data as tensors | |
} | |
load_process_data() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment