Skip to content

Instantly share code, notes, and snippets.

function create_fruit_table(conn_master, db_name::String, table_name::String)
create_db(conn_master, db_name)
conn = connect_db(db_name)
df_tables = DBInterface.execute(conn,
"SELECT * FROM information_schema.tables") |> DataFrame
if table_name ∉ df_tables[!, :TABLE_NAME]
function create_db(conn, db_name::String)
df_db = list_db(conn)
if db_name ∉ df_db.name
try
DBInterface.execute(conn, "CREATE DATABASE $(db_name)")
@info "Created a new database $(db_name)"
catch e
error("Unable to create a new database $(db_name)")
function plot_failed_scatter(location::String,
num_files::Int64;
s1::String,
s2::String)
df_all_nok = get_failed_drives(location, num_files)
start_date, end_date = df_all_nok[!, :date][1], df_all_nok[!, :date][end]
# Calculate Pearson's correlation coefficient
df_corr = select(df_all_nok, [Symbol(s1), Symbol(s2)]) |> dropmissing
function get_parameter_split(location::String,
num_files::Int64;
smart_stat::String)
files = read_filepaths(location)
files_to_use = files[1:num_files]
dates = Date[]
nok_match, ok_match = [Float64[] for i = 1:2]
function get_stat_match(df::DataFrame, smart_stat::String)
num_total = nrow(df)
df_stat = select(df, Symbol(smart_stat) => Symbol("to_check"))
num_stat = count(row -> ~ismissing(row.to_check) &&
row.to_check > 0, eachrow(df_stat))
num_match = (num_stat / num_total) * 100
function plot_failed_model_pert(location::String,
num_files::Int64)
files = read_filepaths(location)
# Read first file to get a population sample
df_hdd = csv_to_df(files[1])
df_hdd_model = get_model_count(df_hdd)
# Get total failed drives
function get_model_count(df_hdd::DataFrame)
# Get count of each model
all_models = df_hdd[!, :model] |> unique
all_counts = Int64[]
for model in all_models
num_counts = count(x -> x == model, df_hdd[!, :model])
push!(all_counts, num_counts)
end
function plot_failed_capacity_dist(location::String,
num_files::Int64)
df_all_nok = get_failed_drives(location, num_files)
start_date, end_date = df_all_nok[!, :date][1], df_all_nok[!, :date][end]
df_hdd_capacity = select(df_all_nok,
:model,
:capacity_bytes => (x -> x / 1e12) => :capacity_TB)
function get_failed_drives(location::String,
num_files::Int64)
files = read_filepaths(location)
@assert num_files ≤ length(files) "Reduce the number of files below $(length(files))"
files_to_use = files[1:num_files]
all_nok = DataFrame[]
for file in files_to_use
function csv_to_df(file)
df_hdd = CSV.File(file, header = 1) |> DataFrame
return df_hdd
end