Skip to content

Instantly share code, notes, and snippets.

@farzadhallaji
Created February 14, 2023 22:50
Show Gist options
  • Save farzadhallaji/5c1b9d21216b880a01517a7de99ac717 to your computer and use it in GitHub Desktop.
Save farzadhallaji/5c1b9d21216b880a01517a7de99ac717 to your computer and use it in GitHub Desktop.
how to read .dat dataset files in pandas
import re
def read_dot_dat_file(path):
datContent = [i.strip().split() for i in open(path).readlines()]
r = re.compile("@inputs.*")
_at_data = datContent.index(['@data'])
assert datContent[0][0] == '@relation'
assert datContent[_at_data-1][0] == '@outputs'
assert datContent[_at_data-2][0] == '@inputs'
assert len(datContent[_at_data-3][2:]) == 2 # Two Class
col_names = datContent[_at_data-2][1:]
col_names.append(datContent[_at_data-1][1])
df = pd.read_csv(path, skiprows=_at_data+1, names=col_names, sep=r', ', engine='python')
# df = pd.read_csv(path, skiprows=_at_data+1, names=col_names, sep=", ", engine='python')
class1 = datContent[_at_data-3][2:][0].replace("{","").replace(",","")
class2 = datContent[_at_data-3][2:][1].replace("}","").replace(",","")
df['Class'] = df['Class'].replace({class1: 1, class2: -1})
return df
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment