Created
August 29, 2021 06:33
-
-
Save amitgupta1202/6ae6d488769ec27f885a6a22872f049b to your computer and use it in GitHub Desktop.
Converts avro schema to big query schema, not a full fledged implementation, works with basic types, records, union and array
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import com.google.api.services.bigquery.model.TableFieldSchema; | |
import com.google.api.services.bigquery.model.TableSchema; | |
import com.google.common.collect.ImmutableMap; | |
import org.apache.avro.Schema; | |
import org.apache.avro.Schema.Type; | |
import java.util.ArrayList; | |
import java.util.List; | |
import static org.apache.avro.Schema.Type.*; | |
public class AvroToTableSchemaConverter { | |
private AvroToTableSchemaConverter() { | |
} | |
public static TableSchema convert(Schema schema) { | |
var ts = new TableSchema(); | |
var fields = generateTableFieldSchemas(schema); | |
ts.setFields(fields); | |
return ts; | |
} | |
private static List<TableFieldSchema> generateTableFieldSchemas(Schema schema) { | |
var tableFieldSchemas = new ArrayList<TableFieldSchema>(); | |
if (schema != null) { | |
for (Schema.Field field : schema.getFields()) { | |
var tableFieldSchema = | |
new TableFieldSchema() | |
.setName(field.name()) | |
.setMode("REQUIRED"); | |
var fieldSchema = field.schema(); | |
if (fieldSchema.getType() == UNION && fieldSchema.getTypes().get(0).getType() == NULL) { | |
tableFieldSchema | |
.setMode("NULLABLE"); | |
fieldSchema = fieldSchema.getTypes().get(1); | |
} | |
if (fieldSchema.getType() == ARRAY) { | |
tableFieldSchema | |
.setMode("REPEATED"); | |
fieldSchema = fieldSchema.getElementType(); | |
} | |
if (fieldSchema.getType() == RECORD) { | |
tableFieldSchema | |
.setType("STRUCT") | |
.setFields(generateTableFieldSchemas(fieldSchema)); | |
} else { | |
tableFieldSchema | |
.setType(getBqType(fieldSchema)); | |
} | |
tableFieldSchemas.add(tableFieldSchema); | |
} | |
} | |
return tableFieldSchemas; | |
} | |
private static String getBqType(Schema schema) { | |
var logicalType = getLogicalType(schema); | |
if (schema.getType() == LONG && "timestamp-millis".equals(logicalType)) { | |
return "TIMESTAMP"; | |
} else if (schema.getType() == INT && "date".equals(logicalType)) { | |
return "DATE"; | |
} else { | |
return AVRO_TYPES_TO_BIG_QUERY.get(schema.getType()); | |
} | |
} | |
private static String getLogicalType(Schema schema) { | |
String logicalType = null; | |
if (schema.getLogicalType() != null) { | |
logicalType = schema.getLogicalType().getName(); | |
} | |
return logicalType; | |
} | |
private static final ImmutableMap<Type, String> AVRO_TYPES_TO_BIG_QUERY = ImmutableMap.<Type, String>builder() | |
.put(STRING, "STRING") | |
.put(INT, "INTEGER") | |
.put(DOUBLE, "FLOAT64") | |
.put(BYTES, "NUMERIC") | |
.put(BOOLEAN, "BOOL") | |
.put(LONG, "INT64") | |
.build(); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment