Skip to content

Instantly share code, notes, and snippets.

@omalley
Created December 14, 2016 17:11
Show Gist options
  • Save omalley/f5d7f8edd8fba47fd6e84c179568672d to your computer and use it in GitHub Desktop.
Save omalley/f5d7f8edd8fba47fd6e84c179568672d to your computer and use it in GitHub Desktop.
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.orc.impl;
import java.io.IOException;
import java.nio.ByteBuffer;
import org.apache.orc.OrcProto;
/**
* This interface separates the physical layout of ORC files from the higher
* level details.
*
* This API is limited to being used by LLAP.
*/
public interface PhysicalWriter {
/**
* Writes the header of the file, which consists of the magic "ORC" bytes.
* @throws IOException
*/
void writeHeader() throws IOException;
/**
* Append a set of bytes to a data stream in a stripe.
* @param name the name of the stream
* @param buffer the range of bytes to write
* @throws IOException
*/
void appendDataStream(StreamName name, ByteBuffer buffer) throws IOException;
/**
* Writes out the index for the stripe column.
* @param name Stream name.
* @param rowIndex Row index entries to write.
*/
void writeIndexStream(StreamName name, OrcProto.RowIndex.Builder rowIndex) throws IOException;
/**
* Append the bloom filter for the next row group to the relevant stream.
* @param streamName Stream name.
* @param bloomFilterIndex Bloom filter index to write.
*/
void appendBloomFilterStream(StreamName streamName,
OrcProto.BloomFilter.Builder bloomFilterIndex) throws IOException;
/**
* Flushes the data in all the streams, spills them to disk, write out stripe footer.
* @param footer Stripe footer to be updated with relevant data and written out.
* @param dirEntry File metadata entry for the stripe, to be updated with relevant data.
*/
void finalizeStripe(OrcProto.StripeFooter.Builder footer,
OrcProto.StripeInformation.Builder dirEntry) throws IOException;
/**
* Writes out the file metadata.
* @param builder Metadata builder to finalize and write.
*/
void writeFileMetadata(OrcProto.Metadata.Builder builder) throws IOException;
/**
* Writes out the file footer.
* @param builder Footer builder to finalize and write.
*/
void writeFileFooter(OrcProto.Footer.Builder builder) throws IOException;
/**
* Writes out the postscript (including the size byte if needed).
* @param builder Postscript builder to finalize and write.
*/
long writePostScript(OrcProto.PostScript.Builder builder) throws IOException;
/**
* Closes the writer.
*/
void close() throws IOException;
/**
* Flushes the writer so that readers can see the preceeding postscripts.
*/
void flush() throws IOException;
/** @return physical stripe size, taking padding into account. */
long getPhysicalStripeSize();
/**
* Appends raw stripe data (e.g. for file merger).
* @param stripe Stripe data buffer.
* @param dirEntry File metadata entry for the stripe, to be updated with relevant data.
* @throws IOException
*/
void appendRawStripe(ByteBuffer stripe,
OrcProto.StripeInformation.Builder dirEntry) throws IOException;
/**
* @return the estimated memory usage for the stripe.
*/
long estimateMemory();
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment