Skip to content

Instantly share code, notes, and snippets.

@nmpowell
Last active July 1, 2021 16:15
Show Gist options
  • Save nmpowell/9ea22c687de21dba860643fc914b0420 to your computer and use it in GitHub Desktop.
Save nmpowell/9ea22c687de21dba860643fc914b0420 to your computer and use it in GitHub Desktop.
Pandas functions to read .CSV files before / after an empty line is encountered.
# Python functions to read .CSV files into a Pandas DataFrame when the data of interest is before / after one or more empty/blank lines.
def csv_after_emptylines(filepath, bl_group_n=1, dtype=str):
""" Read a .CSV into a Pandas DataFrame, but only after at least one blank line has been skipped.
bl_group_n is the expected number of distinct blocks of blank lines (of any number of rows each) to skip before reading data.
NB: E.g. pd.read_csv(filepath, skiprows=[0, 1, 2]) works if you know the number of rows to be skipped. Use this function if you have a variable / unknown number of filled rows (to be skipped / ignored) before the empty rows.
"""
with open(filepath, newline='') as f:
blank_lines = 0
bl_groups = 0
contents = []
headers = None
r = csv.reader(f)
for i, l in enumerate(r):
if bl_groups < bl_group_n:
if not l:
blank_lines += 1
continue
if blank_lines == 0:
continue
bl_groups += 1
blank_lines = 0
headers = l
continue
contents.append(l)
return pd.DataFrame(data=contents, columns=headers, dtype=dtype)
def csv_until_emptyline(filepath, dtype=str):
""" Read a .CSV into a Pandas DataFrame until a blank line is found, then stop.
"""
with open(filepath, newline='') as f:
contents = []
r = csv.reader(f)
for i, l in enumerate(r):
if not l:
break
if i == 0:
headers = l
continue
contents.append(l)
return pd.DataFrame(data=contents, columns=headers, dtype=dtype)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment