Skip to content

Instantly share code, notes, and snippets.

@rdblue
rdblue / TestAvroSchemaConverter.java
Last active August 29, 2015 13:56
Test to catch parquet-mr #12
@Test
public void testArrayOfRecords() throws Exception {
Schema innerRecord = Schema.createRecord("InnerRecord", null, null, false);
Schema optionalString = optional(Schema.create(Schema.Type.STRING));
innerRecord.setFields(Lists.newArrayList(
new Schema.Field("s1", optionalString, null, NullNode.getInstance()),
new Schema.Field("s2", optionalString, null, NullNode.getInstance())
));
Schema schema = Schema.createRecord("HasArray", null, null, false);
schema.setFields(Lists.newArrayList(
@rdblue
rdblue / gist:ca14f2a469f36d7f012c
Created May 17, 2014 00:35
Movie schema with column mappings.
{
"type" : "record",
"name" : "Movie",
"doc" : "Schema generated by Kite",
"fields" : [ {
"name" : "id",
"type" : "long"
}, {
"name" : "title",
"type" : [ "null", "string" ]
@rdblue
rdblue / TestProvidedConfiguration.java
Created September 12, 2014 22:53
Tests that hdfs-site.xml is correctly loaded in the master branch.
/*
* Copyright 2013 Cloudera Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
@rdblue
rdblue / RefinableView.java
Created April 2, 2015 23:31
Column projection methods
/**
* Creates a copy of this {@code View} for entities of the given type class.
*
* @param type a Java Class to use for entities
* @return a copy of this view for the given type
* @throws org.kitesdk.data.IncompatibleSchemaException
* If the schema for {@code type} is incompatible with the
* underlying dataset's Schema.
*/
<T> RefinableView<T> asType(Class<T> type);
@rdblue
rdblue / Comparators.java
Created November 9, 2017 16:49
Java Comparators for CharSequence and ByteBuffer
import java.nio.ByteBuffer;
import java.util.Comparator;
public class Comparators {
public static Comparator<ByteBuffer> unsignedBytes() {
return UnsignedByteBufComparator.INSTANCE;
}
public static Comparator<ByteBuffer> signedBytes() {
return Comparator.naturalOrder();
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
@rdblue
rdblue / DSv2Analysis.scala
Last active September 19, 2018 22:23
DataSourceV2 SQL Analysis Rules
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
@rdblue
rdblue / DataFrameWriterV2.scala
Created May 22, 2019 22:21
Prototype DataFrameWriter for v2 tables
/**
* Interface used to write a [[Dataset]] to external storage using the v2 API.
*
* @since 3.0.0
*/
@Experimental
final class DataFrameWriterV2[T] private[sql](table: String, ds: Dataset[T])
extends CreateTableWriter[T] with LookupCatalog {
import org.apache.spark.sql.catalog.v2.CatalogV2Implicits._
/**
* The base physical plan for writing data into data source v2.
*/
abstract class V2TableWriteExec(
options: Map[String, String],
query: SparkPlan) extends SparkPlan {
import org.apache.spark.sql.sources.v2.DataSourceV2Implicits._
def partitioning: Seq[PartitionTransform]
override def children: Seq[SparkPlan] = Seq(query)
package org.apache.iceberg.types;
import java.util.List;
import org.apache.iceberg.Schema;
import org.apache.iceberg.relocated.com.google.common.collect.Lists;
public abstract class SchemaWithPartnerVisitor<P, R> {
public interface PartnerAccessors<P> {
P fieldPartner(P partnerStruct, int fieldId, String name);
P mapKeyPartner(P partnerMap);