In pandas
it is much faster to query a row independently of the headers and then using dict
and zip
to convert into header to column value mapping rather than using the in-built to_dict
method.
import pandas as pd
import os, time
dataframe = pd.read_csv("data.csv", index_col=False)
start_time = time.time()
row = dataframe.loc[[25], list(dataframe)].to_dict("records")[0]
print("TIME: {}".format(time.time() - start_time))
>> TIME: 0.10403084754943848
start_time = time.time()
row = dataframe.loc[25, :]
row_data = dict(zip(list(dataframe), row))
print("TIME: {}".format(time.time() - start_time))
>> TIME: 0.00045490264892578125