Created
August 14, 2017 17:29
-
-
Save sternj/52bc09ab32cf767cf475bde8db59e93e to your computer and use it in GitHub Desktop.
A script to perform a case- and variable-merge of two SPSS datasets
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
import copy | |
import spss | |
#This program takes two sets and appends the second to the first, including ALL cases and variables. | |
#It does this by adding the number of cases in the second set to the first set, cross-referencing and rectifying all | |
#value labels, and then finally populating the newly-constructed cases in the first dataset with the data from | |
#the second. | |
#For running outside of spss-- this gets the two datasets. | |
spss.Submit(""" | |
GET | |
FILE='D:\\binar\\Pictures\\Downloads\\dataset1.sav'. | |
DATASET NAME DataSet1 WINDOW=FRONT.""") | |
spss.Submit(""" | |
GET | |
FILE='D:\\binar\\Pictures\\Downloads\\dataset2.sav'. | |
DATASET NAME DataSet2 WINDOW=FRONT.""") | |
#Clears python/spss environment from any previous runs | |
spss.EndDataStep() | |
#Args: | |
#set1,set2-- spss.Dataset objects. varname1, varname2-- String objects from spss.Variable.name field | |
#Purpose: | |
#Checks if Value Levels from SPSS dictionary are the same. If so, does nothing. | |
#If not, checks for conflicts. If there are conflicts, it remaps variables and levels in set2. | |
#In either case, it makes the value labels of varname1 the union of the labels of varname1 and varname2 | |
def modifyVarLevels(set1, set2, varname1, varname2): | |
#Note-- varname* MUST be a string | |
if(set1.varlist[varname1].valueLabels == set2.varlist[varname2].valueLabels): | |
return None | |
else: | |
for i in dict.keys(set2.varlist[varname2].valueLabels): | |
if i in dict.keys(set1.varlist[varname1].valueLabels): | |
if(set1.varlist[varname1].valueLabels[i] != set2.varlist[varname2].valueLabels[i]): | |
#if there is any value which is incongruent between sets | |
newVal = max(max(dict.keys(set1.varlist[varname1].valueLabels)),max(dict.keys(set2.varlist[varname2].valueLabels)))+1 | |
#This guarantees a unique value-- what this line is doing is finding the maximum value across both sets and adding one to it, guaranteeing uniqueness | |
replaceVals(set2, set2.varlist[varname2].index, i,newVal) | |
else: | |
set1.varlist[varname1].valueLabels[i] = set2.varlist[varname2].valueLabels[i] | |
return None | |
#Args: | |
#setObj-- spss.Dataset object. index-- integer drawn from the "index" field of an spss.Variable object. | |
#oldval-- value at the index at each case to replace. All other values are ignored. | |
#newval-- value put in place of oldval when oldval is detected | |
def replaceVals(setObj,index,oldVal,newVal): | |
for i in setObj.cases: | |
if(i[index] == oldVal): | |
i[index] = newVal | |
return None | |
#Args: | |
#newSet-- spss.Dataset object where values are transferred to. oldSet-- spss.Dataset object from where values | |
#are transferred | |
#destVar-- spss.Variable object representing column where values are transferred to. #origVar-- same, but origin of val | |
#startIndex-- ending of original dataset 1 | |
#Purpose-- takes all values in the origVar column in oldSet and appends them to the destVar column in newSet starting | |
#at startIndex | |
def appendVals(newSet,oldSet,destVar,origVar,startIndex): | |
#Note-- startIndex must be inclusive.The first case will be written to the startindex | |
for i in range(len(oldSet.cases)): | |
# print(i) | |
print(len(newSet.cases)) | |
print(len(oldSet.cases)) | |
newSet.cases[startIndex+i,destVar.index] = oldSet.cases[i,origVar.index] | |
return None | |
#Args: | |
#set[1|2]-- spss.Dataset object. var-- spss.Variable object. endSet1-- integer representing original length of set1 | |
#function to be executed if variable in set2 is not found by name in set1. See init and inVarFunction | |
def notInVarFunction(set1,set2,var,endSet1): | |
print(var.label) | |
for j in set2.varlist: | |
print(j) | |
if j.label.lower() == var.label.lower(): | |
inVarFunction(set1,set2,var,j,endSet1) | |
return None | |
set1.varlist.append(copy.deepcopy(var)) | |
appendVals(set1,set2,var.name,var.name,endSet1) | |
return None | |
#set[1|2]-- spss.Dataset object. var[1|2]-- spss.Variable object. endSet1-- original length of set1 | |
def inVarFunction(set1,set2,var1,var2,endSet1): | |
appendVals(set1,set2,var1,var2,endSet1) | |
return None | |
#Args: | |
#set1-- first set (see above). set2-- second set (see above). Both spss.Dataset objects. | |
def init(set1,set2): | |
lengthSet1 = len(set1.cases) | |
lengthSet2 = len(set2.cases) | |
print("Lengths found") | |
#appending relevant number of cases | |
for i in range(lengthSet2): | |
set1.cases.append() | |
print(str(lengthSet2)+" cases appended") | |
for i in set2.varlist: | |
bool = False | |
for j in set1.varlist: | |
print(i) | |
print(j) | |
if i.name == j.name: | |
print(i.name+" being transferred") | |
inVarFunction(set1,set2,i,i,lengthSet1) | |
bool = True | |
break | |
if bool == False: | |
print(i.name+" being added") | |
notInVarFunction(set1,set2,i,lengthSet1) | |
return None | |
spss.StartDataStep() | |
print("Started") | |
#imports two datasets found above to Python | |
ds1 = spss.Dataset(name="DataSet1") | |
ds2= spss.Dataset(name="DataSet2") | |
print("Imported") | |
#runs main function | |
init(ds1,ds2) | |
#Closes out spss.DataStep object | |
spss.EndDataStep() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment