nmpowell/csv_pandas_parse_emptylines.py

## csv_pandas_parse_emptylines.py
# Python functions to read .CSV files into a Pandas DataFrame when the data of interest is before / after one or more empty/blank lines.

def csv_after_emptylines(filepath, bl_group_n=1, dtype=str):
    """ Read a .CSV into a Pandas DataFrame, but only after at least one blank line has been skipped.

    bl_group_n is the expected number of distinct blocks of blank lines (of any number of rows each) to skip before reading data.

    NB: E.g. pd.read_csv(filepath, skiprows=[0, 1, 2]) works if you know the number of rows to be skipped. Use this function if you have a variable / unknown number of filled rows (to be skipped / ignored) before the empty rows.
    """
    with open(filepath, newline='') as f:
        blank_lines = 0
        bl_groups = 0
        contents = []
        headers = None
        r = csv.reader(f)
        for i, l in enumerate(r):
            if bl_groups < bl_group_n:
                if not l:
                    blank_lines += 1
                    continue
                if blank_lines == 0:
                    continue
                bl_groups += 1
                blank_lines = 0
                headers = l
                continue
            contents.append(l)
        return pd.DataFrame(data=contents, columns=headers, dtype=dtype)


def csv_until_emptyline(filepath, dtype=str):
    """ Read a .CSV into a Pandas DataFrame until a blank line is found, then stop.
    """
    with open(filepath, newline='') as f:
        contents = []
        r = csv.reader(f)
        for i, l in enumerate(r):
            if not l:
                break
            if i == 0:
                headers = l
                continue
            contents.append(l)
        return pd.DataFrame(data=contents, columns=headers, dtype=dtype)
	# Python functions to read .CSV files into a Pandas DataFrame when the data of interest is before / after one or more empty/blank lines.

	def csv_after_emptylines(filepath, bl_group_n=1, dtype=str):
	""" Read a .CSV into a Pandas DataFrame, but only after at least one blank line has been skipped.

	bl_group_n is the expected number of distinct blocks of blank lines (of any number of rows each) to skip before reading data.

	NB: E.g. pd.read_csv(filepath, skiprows=[0, 1, 2]) works if you know the number of rows to be skipped. Use this function if you have a variable / unknown number of filled rows (to be skipped / ignored) before the empty rows.
	"""
	with open(filepath, newline='') as f:
	blank_lines = 0
	bl_groups = 0
	contents = []
	headers = None
	r = csv.reader(f)
	for i, l in enumerate(r):
	if bl_groups < bl_group_n:
	if not l:
	blank_lines += 1
	continue
	if blank_lines == 0:
	continue
	bl_groups += 1
	blank_lines = 0
	headers = l
	continue
	contents.append(l)
	return pd.DataFrame(data=contents, columns=headers, dtype=dtype)


	def csv_until_emptyline(filepath, dtype=str):
	""" Read a .CSV into a Pandas DataFrame until a blank line is found, then stop.
	"""
	with open(filepath, newline='') as f:
	contents = []
	r = csv.reader(f)
	for i, l in enumerate(r):
	if not l:
	break
	if i == 0:
	headers = l
	continue
	contents.append(l)
	return pd.DataFrame(data=contents, columns=headers, dtype=dtype)