Skip to content

Instantly share code, notes, and snippets.

@amitgupta1202
Created August 29, 2021 06:33
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save amitgupta1202/6ae6d488769ec27f885a6a22872f049b to your computer and use it in GitHub Desktop.
Save amitgupta1202/6ae6d488769ec27f885a6a22872f049b to your computer and use it in GitHub Desktop.
Converts avro schema to big query schema, not a full fledged implementation, works with basic types, records, union and array
import com.google.api.services.bigquery.model.TableFieldSchema;
import com.google.api.services.bigquery.model.TableSchema;
import com.google.common.collect.ImmutableMap;
import org.apache.avro.Schema;
import org.apache.avro.Schema.Type;
import java.util.ArrayList;
import java.util.List;
import static org.apache.avro.Schema.Type.*;
public class AvroToTableSchemaConverter {
private AvroToTableSchemaConverter() {
}
public static TableSchema convert(Schema schema) {
var ts = new TableSchema();
var fields = generateTableFieldSchemas(schema);
ts.setFields(fields);
return ts;
}
private static List<TableFieldSchema> generateTableFieldSchemas(Schema schema) {
var tableFieldSchemas = new ArrayList<TableFieldSchema>();
if (schema != null) {
for (Schema.Field field : schema.getFields()) {
var tableFieldSchema =
new TableFieldSchema()
.setName(field.name())
.setMode("REQUIRED");
var fieldSchema = field.schema();
if (fieldSchema.getType() == UNION && fieldSchema.getTypes().get(0).getType() == NULL) {
tableFieldSchema
.setMode("NULLABLE");
fieldSchema = fieldSchema.getTypes().get(1);
}
if (fieldSchema.getType() == ARRAY) {
tableFieldSchema
.setMode("REPEATED");
fieldSchema = fieldSchema.getElementType();
}
if (fieldSchema.getType() == RECORD) {
tableFieldSchema
.setType("STRUCT")
.setFields(generateTableFieldSchemas(fieldSchema));
} else {
tableFieldSchema
.setType(getBqType(fieldSchema));
}
tableFieldSchemas.add(tableFieldSchema);
}
}
return tableFieldSchemas;
}
private static String getBqType(Schema schema) {
var logicalType = getLogicalType(schema);
if (schema.getType() == LONG && "timestamp-millis".equals(logicalType)) {
return "TIMESTAMP";
} else if (schema.getType() == INT && "date".equals(logicalType)) {
return "DATE";
} else {
return AVRO_TYPES_TO_BIG_QUERY.get(schema.getType());
}
}
private static String getLogicalType(Schema schema) {
String logicalType = null;
if (schema.getLogicalType() != null) {
logicalType = schema.getLogicalType().getName();
}
return logicalType;
}
private static final ImmutableMap<Type, String> AVRO_TYPES_TO_BIG_QUERY = ImmutableMap.<Type, String>builder()
.put(STRING, "STRING")
.put(INT, "INTEGER")
.put(DOUBLE, "FLOAT64")
.put(BYTES, "NUMERIC")
.put(BOOLEAN, "BOOL")
.put(LONG, "INT64")
.build();
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment