Skip to content

Instantly share code, notes, and snippets.

@DIRKMJK
Created May 17, 2019 19:02
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save DIRKMJK/6bc55bc97c266b9b62d2e3f527a185af to your computer and use it in GitHub Desktop.
Save DIRKMJK/6bc55bc97c266b9b62d2e3f527a185af to your computer and use it in GitHub Desktop.
Read .sps file and convert to dataframe
"""Read .sps file and convert to dataframe
For caveats please see:
https://dirkmjk.nl/en/2017/04/python-script-import-sps-files
"""
import re
import pandas as pd
PATTERN = r'\"(.*?)\"'
def get_cells(row):
"""Split row into cells"""
cells = re.findall(PATTERN, row)
return [c.strip() for c in cells]
def read_sps(path, encoding):
"""Read .sps file and convert to dataframe"""
text = open(path, encoding=encoding).read()
column_txt = text.split('VAR LABELS')[1].split('VALUE LABELS')[0]
column_lines = column_txt.split('\n')
columns = [line.split('"')[1] for line in column_lines if '"' in line]
contains_data = False
contains_labels = False
new_row = None
df = pd.DataFrame(columns=columns)
labels = {}
lines = text.split('\n')
for line in lines:
if line == 'END DATA.':
contains_data = False
if contains_data:
cells = [cell for cell in re.split('\s+', line) if cell != '']
if not new_row:
new_row = cells
else:
new_row.extend(cells)
if len(new_row) == len(columns):
df.loc[len(df)] = new_row
new_row = []
if line == 'BEGIN DATA':
contains_data = True
if contains_labels:
stripped = line.strip()
if stripped.startswith('Key'):
current = int(stripped[3:].split(' ')[0])
label_columns = [columns[current], 'label_{}'.format(current)]
labels[current] = pd.DataFrame(columns=label_columns)
new_row = get_cells(line)
if len(new_row) == 2:
labels[current].loc[len(labels[current])] = new_row
if line == 'VALUE LABELS':
contains_labels = True
for key in labels:
df = df.merge(labels[key], on=columns[key], how='outer')
return df
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment