Skip to content

Instantly share code, notes, and snippets.

@stephen-soltesz
Last active October 29, 2019 03:40
Show Gist options
  • Save stephen-soltesz/fd52e5633563e9f652f83606cfdeb526 to your computer and use it in GitHub Desktop.
Save stephen-soltesz/fd52e5633563e9f652f83606cfdeb526 to your computer and use it in GitHub Desktop.
reflect / schema
git clone https://gist.github.com/stephen-soltesz/fd52e5633563e9f652f83606cfdeb526 reflect-schema
cd reflect-schema/
go get .
ParseInfo:
Description: The thing
TaskFileName:
Description: archive filename containing test_id.
ParseTime:
Description: time the parser parsed.
ParserVersion:
Description: source version of the ETL parser.
test_id:
Description: filename of measurement.
log_time:
Description: time of the test measurement
package main
import (
"fmt"
"io/ioutil"
"reflect"
"github.com/kr/pretty"
"cloud.google.com/go/bigquery"
"github.com/m-lab/etl/schema"
"github.com/m-lab/go/rtx"
yaml "gopkg.in/yaml.v2"
)
// SchemaDoc documents fields discovered during InferSchema.
type SchemaDoc map[string]map[string]string
// NewSchemaDoc returns a new schema doc initialized with the field
// descriptions in the given file. Failures are fatal.
func NewSchemaDoc(file string) *SchemaDoc {
docs, err := ioutil.ReadFile(file)
rtx.Must(err, "Failed to read list")
sd := &SchemaDoc{}
err = yaml.Unmarshal([]byte(docs), sd)
rtx.Must(err, "Failed to unmarshal %q", file)
return sd
}
// InferSchema generates a schema from the given value. Invalid fields are ignored.
func (m SchemaDoc) InferSchema(val interface{}) bigquery.Schema {
t := reflect.TypeOf(val)
s := m.inferSchema("", "", t)
return s.Schema
}
func newFieldSchema(
name, doc string, repeated bool, fieldType bigquery.FieldType) *bigquery.FieldSchema {
return &bigquery.FieldSchema{
Name: name,
Description: doc,
Repeated: repeated,
Required: false,
Type: fieldType,
}
}
func getFieldName(f reflect.StructField) string {
name := f.Name
v, ok := f.Tag.Lookup("bigquery")
if ok {
if v == "-" {
return ""
}
name = v
}
return name
}
// Check for bqx name then fallback to fieldName.
func getDocName(f reflect.StructField) string {
name := getFieldName(f)
v, ok := f.Tag.Lookup("bqx")
if ok {
name = v
}
return name
}
func (m SchemaDoc) inferSchema(fieldName, docName string, t reflect.Type) *bigquery.FieldSchema {
fmt.Println("docname: ", docName)
doc := m[docName]
/*
TODO: BytesFieldType FieldType = "BYTES"
TODO: DateFieldType FieldType = "DATE"
TODO: TimeFieldType FieldType = "TIME"
TODO: DateTimeFieldType FieldType = "DATETIME"
*/
switch t.Kind() {
case reflect.Struct:
// Special handling of time.Time types.
if t.String() == "time.Time" {
record := newFieldSchema(fieldName, doc["Description"], false, bigquery.TimestampFieldType)
return record
}
// At this point, treat field like a regular struct record.
record := newFieldSchema(fieldName, doc["Description"], false, bigquery.RecordFieldType)
schema := []*bigquery.FieldSchema{}
for i := 0; i < t.NumField(); i++ {
f := t.Field(i)
name := getFieldName(f)
if name == "" {
continue
}
l := m.inferSchema(name, getDocName(f), f.Type)
if l != nil {
schema = append(schema, l)
}
}
record.Schema = schema
return record
case reflect.Slice:
record := newFieldSchema(fieldName, doc["Description"], true, bigquery.RecordFieldType)
// Lookup type of slice to perform reflection on that type.
t := t.Elem()
s := m.inferSchema(fieldName, docName, t)
// The slice type was for a primitive type, with no sub-schema.
if s.Schema == nil {
s.Repeated = true
return s
}
// The slice type was for a structure type. Copy the sub-schema.
record.Schema = s.Schema
return record
case reflect.String:
return newFieldSchema(fieldName, doc["Description"], false, bigquery.StringFieldType)
case reflect.Bool:
return newFieldSchema(fieldName, doc["Description"], false, bigquery.BooleanFieldType)
case reflect.Int, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Uint, reflect.Uint16, reflect.Uint32, reflect.Uint64:
// TODO: check whether a type override is present in doc["Type"], e.g. Timestamp for Int.
return newFieldSchema(fieldName, doc["Description"], false, bigquery.IntegerFieldType)
case reflect.Float32, reflect.Float64:
return newFieldSchema(fieldName, doc["Description"], false, bigquery.FloatFieldType)
case reflect.Ptr:
t = t.Elem()
return m.inferSchema(fieldName, docName, t)
case reflect.Array:
// TODO: support reflect.Array
// TODO: identify special handling for BYTES field types?
panic("Array is not a supported field type")
case reflect.Map:
// We cannot support Maps without a separate impelementation of ValueSaver.
panic("Map is not a supported field type")
}
// Unknown type.
return nil
}
func main() {
sd := NewSchemaDoc("docs.txt")
pretty.Print(sd)
// Fields are documented with docs.txt.
n := schema.NDTResult{}
s := sd.InferSchema(n)
pretty.Print(s)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment