from pyspark.sql import *
import matplotlib.pyplot as pyplot
import seaborn as sns
import pandas as pd
spark = SparkSession.builder.master("local[*]").appName("MADS 2020").getOrCreate()
data ="data/machine_log.csv", inferSchema="True", header="True", sep=";")
data_sample = data.sample(fraction=0.1, seed=42)
# some compound have less produced units
