Skip to content

Instantly share code, notes, and snippets.

@sternj
Created August 14, 2017 17:29
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sternj/52bc09ab32cf767cf475bde8db59e93e to your computer and use it in GitHub Desktop.
Save sternj/52bc09ab32cf767cf475bde8db59e93e to your computer and use it in GitHub Desktop.
A script to perform a case- and variable-merge of two SPSS datasets
import sys
import copy
import spss
#This program takes two sets and appends the second to the first, including ALL cases and variables.
#It does this by adding the number of cases in the second set to the first set, cross-referencing and rectifying all
#value labels, and then finally populating the newly-constructed cases in the first dataset with the data from
#the second.
#For running outside of spss-- this gets the two datasets.
spss.Submit("""
GET
FILE='D:\\binar\\Pictures\\Downloads\\dataset1.sav'.
DATASET NAME DataSet1 WINDOW=FRONT.""")
spss.Submit("""
GET
FILE='D:\\binar\\Pictures\\Downloads\\dataset2.sav'.
DATASET NAME DataSet2 WINDOW=FRONT.""")
#Clears python/spss environment from any previous runs
spss.EndDataStep()
#Args:
#set1,set2-- spss.Dataset objects. varname1, varname2-- String objects from spss.Variable.name field
#Purpose:
#Checks if Value Levels from SPSS dictionary are the same. If so, does nothing.
#If not, checks for conflicts. If there are conflicts, it remaps variables and levels in set2.
#In either case, it makes the value labels of varname1 the union of the labels of varname1 and varname2
def modifyVarLevels(set1, set2, varname1, varname2):
#Note-- varname* MUST be a string
if(set1.varlist[varname1].valueLabels == set2.varlist[varname2].valueLabels):
return None
else:
for i in dict.keys(set2.varlist[varname2].valueLabels):
if i in dict.keys(set1.varlist[varname1].valueLabels):
if(set1.varlist[varname1].valueLabels[i] != set2.varlist[varname2].valueLabels[i]):
#if there is any value which is incongruent between sets
newVal = max(max(dict.keys(set1.varlist[varname1].valueLabels)),max(dict.keys(set2.varlist[varname2].valueLabels)))+1
#This guarantees a unique value-- what this line is doing is finding the maximum value across both sets and adding one to it, guaranteeing uniqueness
replaceVals(set2, set2.varlist[varname2].index, i,newVal)
else:
set1.varlist[varname1].valueLabels[i] = set2.varlist[varname2].valueLabels[i]
return None
#Args:
#setObj-- spss.Dataset object. index-- integer drawn from the "index" field of an spss.Variable object.
#oldval-- value at the index at each case to replace. All other values are ignored.
#newval-- value put in place of oldval when oldval is detected
def replaceVals(setObj,index,oldVal,newVal):
for i in setObj.cases:
if(i[index] == oldVal):
i[index] = newVal
return None
#Args:
#newSet-- spss.Dataset object where values are transferred to. oldSet-- spss.Dataset object from where values
#are transferred
#destVar-- spss.Variable object representing column where values are transferred to. #origVar-- same, but origin of val
#startIndex-- ending of original dataset 1
#Purpose-- takes all values in the origVar column in oldSet and appends them to the destVar column in newSet starting
#at startIndex
def appendVals(newSet,oldSet,destVar,origVar,startIndex):
#Note-- startIndex must be inclusive.The first case will be written to the startindex
for i in range(len(oldSet.cases)):
# print(i)
print(len(newSet.cases))
print(len(oldSet.cases))
newSet.cases[startIndex+i,destVar.index] = oldSet.cases[i,origVar.index]
return None
#Args:
#set[1|2]-- spss.Dataset object. var-- spss.Variable object. endSet1-- integer representing original length of set1
#function to be executed if variable in set2 is not found by name in set1. See init and inVarFunction
def notInVarFunction(set1,set2,var,endSet1):
print(var.label)
for j in set2.varlist:
print(j)
if j.label.lower() == var.label.lower():
inVarFunction(set1,set2,var,j,endSet1)
return None
set1.varlist.append(copy.deepcopy(var))
appendVals(set1,set2,var.name,var.name,endSet1)
return None
#set[1|2]-- spss.Dataset object. var[1|2]-- spss.Variable object. endSet1-- original length of set1
def inVarFunction(set1,set2,var1,var2,endSet1):
appendVals(set1,set2,var1,var2,endSet1)
return None
#Args:
#set1-- first set (see above). set2-- second set (see above). Both spss.Dataset objects.
def init(set1,set2):
lengthSet1 = len(set1.cases)
lengthSet2 = len(set2.cases)
print("Lengths found")
#appending relevant number of cases
for i in range(lengthSet2):
set1.cases.append()
print(str(lengthSet2)+" cases appended")
for i in set2.varlist:
bool = False
for j in set1.varlist:
print(i)
print(j)
if i.name == j.name:
print(i.name+" being transferred")
inVarFunction(set1,set2,i,i,lengthSet1)
bool = True
break
if bool == False:
print(i.name+" being added")
notInVarFunction(set1,set2,i,lengthSet1)
return None
spss.StartDataStep()
print("Started")
#imports two datasets found above to Python
ds1 = spss.Dataset(name="DataSet1")
ds2= spss.Dataset(name="DataSet2")
print("Imported")
#runs main function
init(ds1,ds2)
#Closes out spss.DataStep object
spss.EndDataStep()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment