Skip to content

Instantly share code, notes, and snippets.

@orellabac
Created November 23, 2023 13:31
Show Gist options
  • Save orellabac/3e451ec15e08f0edfcde8a765d893938 to your computer and use it in GitHub Desktop.
Save orellabac/3e451ec15e08f0edfcde8a765d893938 to your computer and use it in GitHub Desktop.
Snowpark Java SPSS\.sav files metadata extractor
/*
Go to https://mvnrepository.com/artifact/com.bedatadriven.spss/spss-reader
and download the jar file.
And upload it into an stage
*/
create or replace function read_spss_metadata(filename string)
RETURNS TABLE(COLUMN_NAME TEXT,TYPE TEXT,NULLABLE BOOLEAN,EXPRESSION TEXT,FILENAMES TEXT,ORDER_ID INT)
language java
imports = ('@mystage/spss-reader-1.3.jar')
handler='SPSSMetadataUDTF'
target_path='@mystage/spss-reader-metadata.jar'
as $$
import com.bedatadriven.spss.SpssDataFileReader;
import com.bedatadriven.spss.SpssVariable;
import java.util.stream.Stream;
import java.io.IOException;
import java.io.InputStream;
import java.util.*;
import com.snowflake.snowpark_java.types.SnowflakeFile;
class OutputRowMetadata {
public String COLUMN_NAME;
public String TYPE;
public boolean NULLABLE;
public String EXPRESSION;
public String FILENAMES;
public int ORDER_ID;
public OutputRowMetadata(String COLUMN_NAME, String TYPE, boolean NULLABLE, String EXPRESSION, String FILENAMES, int ORDER_ID) {
this.COLUMN_NAME = COLUMN_NAME;
this.TYPE = TYPE;
this.NULLABLE = NULLABLE;
this.EXPRESSION = EXPRESSION;
this.FILENAMES = FILENAMES;
this.ORDER_ID = ORDER_ID;
}
}
class SPSSDecoder {
public static Stream<OutputRowMetadata> getMedatata(InputStream inputStream) {
try {
var reader = new SpssDataFileReader(inputStream);
var result = new ArrayList<OutputRowMetadata>();
for(var variable: reader.getVariables())
{
String snowflakeType = getType(variable);
var columnName = variable.getVariableName();
var expression = "$1:"+ columnName+ "::"+ snowflakeType;
var metadataInfo = new OutputRowMetadata(
columnName,
snowflakeType,
false,
expression,null,
variable.getIndex());
result.add(metadataInfo);
}
return result.stream();
} catch (IOException e) {
return null;
}
}
private static String getType(SpssVariable variable) {
var format =variable.getPrintFormat();
switch(format.getType())
{
case 1: return "STRING";
case 5: return "NUMBER(38," + format.getDecimalPlaces() + ")";
case 21: return "TIME";
case 22: return "DATETIME";
case 38: return "DATE";
}
return null;
}
}
class SPSSMetadataUDTF {
public static Class getOutputClass() {
return OutputRowMetadata.class;
}
public Stream<OutputRowMetadata> process(String inputFile) {
SnowflakeFile file = SnowflakeFile.newInstance(inputFile, false);
return SPSSDecoder.getMedatata(file.getInputStream());
}
public Stream<OutputRowMetadata> endPartition() {
return Stream.empty();
}
}
$$;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment