pjankiewicz/mercari.py

## mercari.py
import numpy as np
import pandas as pd
from ludwig import LudwigModel
from sklearn.model_selection import train_test_split

df = pd.read_csv("input/train.tsv", sep="\t")
df["log_price"] = np.log1p(df["price"])

model = LudwigModel({
    "input_features": [
        {"name": "name", "type": "text"},
        {"name": "category_name", "type": "text"},
        {"name": "brand_name", "type": "text"},
        {"name": "item_description", "type": "text"},
        {"name": "shipping", "type": "category"},
        {"name": "item_condition_id", "type": "category"},
    ],
    "output_features": [
        {"name": "log_price", "type": "numerical"}
    ]
})

train, test = train_test_split(df, random_state=0)
model.train(train)
	import numpy as np
	import pandas as pd
	from ludwig import LudwigModel
	from sklearn.model_selection import train_test_split

	df = pd.read_csv("input/train.tsv", sep="\t")
	df["log_price"] = np.log1p(df["price"])

	model = LudwigModel({
	"input_features": [
	{"name": "name", "type": "text"},
	{"name": "category_name", "type": "text"},
	{"name": "brand_name", "type": "text"},
	{"name": "item_description", "type": "text"},
	{"name": "shipping", "type": "category"},
	{"name": "item_condition_id", "type": "category"},
	],
	"output_features": [
	{"name": "log_price", "type": "numerical"}
	]
	})

	train, test = train_test_split(df, random_state=0)
	model.train(train)