Created
August 18, 2015 17:00
-
-
Save gianm/7d8f83619aada126fcd9 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* Licensed to Metamarkets Group Inc. (Metamarkets) under one | |
* or more contributor license agreements. See the NOTICE file | |
* distributed with this work for additional information | |
* regarding copyright ownership. Metamarkets licenses this file | |
* to you under the Apache License, Version 2.0 (the | |
* "License"); you may not use this file except in compliance | |
* with the License. You may obtain a copy of the License at | |
* | |
* http://www.apache.org/licenses/LICENSE-2.0 | |
* | |
* Unless required by applicable law or agreed to in writing, | |
* software distributed under the License is distributed on an | |
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | |
* KIND, either express or implied. See the License for the | |
* specific language governing permissions and limitations | |
* under the License. | |
*/ | |
package io.druid.segment.realtime.appenderator; | |
import com.google.common.util.concurrent.ListenableFuture; | |
import io.druid.data.input.Committer; | |
import io.druid.data.input.InputRow; | |
import io.druid.query.Query; | |
import io.druid.query.QueryRunner; | |
import io.druid.segment.incremental.IndexSizeExceededException; | |
import io.druid.segment.realtime.plumber.CommitterMaker; | |
import java.io.Closeable; | |
import java.util.List; | |
/** | |
* Like a Plumber, but different. | |
* <p/> | |
* An Appenderator manages indexed data. It has some in-memory data and some persisted-on-disk data. It can serve | |
* queries on both of those. It can also push data to deep storage. But, it cannot publish segments to the metadata | |
* store; you have to do that yourself! | |
* <p/> | |
* Any time you call one of the methods that adds, persists, or pushes data, you must provide a Committer or a | |
* CommitterMaker that represents all data you have given to the Appenderator so far. The Committer will be used when | |
* that data has been persisted to disk. | |
*/ | |
public interface Appenderator extends Closeable | |
{ | |
/** | |
* Return the name of the dataSource associated with this Appenderator. | |
*/ | |
String getDataSource(); | |
/** | |
* Perform any initial setup. Should be called before using any other methods. | |
* | |
* @return currently persisted commit metadata | |
*/ | |
Object startJob(); | |
/** | |
* Add a row. Must not be called concurrently from multiple threads. | |
* <p/> | |
* If no pending segment exists for the provided identifier, a new one will be created. | |
* <p/> | |
* This method may trigger a {@link #persistAll(Committer)} using the provided CommitterMaker. If it does this, the | |
* Committer is guaranteed to be *created* synchronously with the cal to add, but will actually be used | |
* asynchronously. | |
* <p/> | |
* The add, clear, persistAll, pushAll, and push methods should all be called from the same thread to ensure that | |
* commit metadata matches data that has actually been persisted to disk. | |
* | |
* @param identifier the segment into which this row should be added | |
* @param row the row to add | |
* @param committerMaker a committer maker associated with all data that has been added so far, including this row | |
* | |
* @return positive number indicating how many summarized rows exist in this segment so far | |
* | |
* @throws IndexSizeExceededException if this row cannot be added because it is too large | |
* @throws SegmentNotWritableException if the requested segment is known, but has been closed | |
*/ | |
int add(SegmentIdentifier identifier, InputRow row, CommitterMaker committerMaker) | |
throws IndexSizeExceededException, SegmentNotWritableException; | |
/** | |
* Returns a list of all currently pending segments. | |
*/ | |
List<SegmentIdentifier> getSegments(); | |
/** | |
* Returns the number of rows in a particular pending segment. | |
* | |
* @param identifier segment to examine | |
* | |
* @return row count | |
* | |
* @throws IllegalStateException if the segment is unknown | |
*/ | |
int getRowCount(final SegmentIdentifier identifier); | |
/** | |
* Drop all in-memory and on-disk data, and forget any previously-remembered commit metadata. This could be useful if, | |
* for some reason, rows have been added that we do not actually want to hand off. Blocks until all data has been | |
* cleared. This may take some time, since all pending persists must finish first. | |
*/ | |
void clear() throws InterruptedException; | |
/** | |
* Drop all data associated with a particular pending segment. Unlike {@link #clear()}), any on-disk commit | |
* metadata will remain unchanged. If there is no pending segment with this identifier, then this method will | |
* do nothing. | |
* <p/> | |
* You should not write to the dropped segment after calling "drop". If you need to drop all your data and | |
* re-write it, consider {@link #clear()} instead. | |
* | |
* @param identifier the pending segment to drop | |
* | |
* @return future that resolves when data is dropped | |
*/ | |
ListenableFuture<?> drop(SegmentIdentifier identifier); | |
/** | |
* Persist any in-memory indexed data to durable storage. This may be only somewhat durable, e.g. the | |
* machine's local disk. The Committer will be made synchronously will the call to persistAll, but will actually | |
* be used asynchronously. Any metadata returned by the committer will be associated with the data persisted to | |
* disk. | |
* <p/> | |
* The add, clear, persistAll, pushAll, and push methods should all be called from the same thread to ensure that | |
* commit metadata matches data that has actually been persisted to disk. | |
* | |
* @param committer a committer associated with all data that has been added so far | |
* | |
* @return future that resolves when all pending data has been persisted, contains commit metadata for this persist | |
*/ | |
ListenableFuture<Object> persistAll(Committer committer); | |
/** | |
* Merge and push any pending data to deep storage. This will trigger an implicit {@link #persistAll(Committer)} | |
* using the provided Committer. | |
* <p/> | |
* After this method is called, you cannot add new data to any segments that were previously under construction. | |
* <p/> | |
* The add, clear, persistAll, pushAll, and push methods should all be called from the same thread to ensure that | |
* commit metadata matches data that has actually been persisted to disk. | |
* | |
* @param committer a committer associated with all data that has been added so far | |
* | |
* @return future that resolves when all segments have been pushed. The segment list will be the list of segments | |
* that have been pushed and the commit metadata from the Committer. | |
*/ | |
ListenableFuture<SegmentsAndMetadata> pushAll(Committer committer); | |
/** | |
* Merge and push particular segments to deep storage. This will trigger an implicit {@link #persistAll(Committer)} | |
* using the provided Committer. | |
* <p/> | |
* After this method is called, you cannot add new data to any segments that were previously under construction. | |
* <p/> | |
* The add, clear, persistAll, pushAll, and push methods should all be called from the same thread to ensure that | |
* commit metadata matches data that has actually been persisted to disk. | |
* | |
* @param identifiers list of segments to push | |
* @param committer a committer associated with all data that has been added so far | |
* | |
* @return future that resolves when all segments have been pushed. The segment list will be the list of segments | |
* that have been pushed and the commit metadata from the Committer. | |
*/ | |
ListenableFuture<SegmentsAndMetadata> push(List<SegmentIdentifier> identifiers, Committer committer); | |
/** | |
* Return a query runner for a particular query. The runner will read all pending data that has been added to | |
* this Appenderator. | |
*/ | |
<T> QueryRunner<T> getQueryRunner(Query<T> query); | |
/** | |
* Stop any currently-running processing and clean up after ourselves. This will not remove any on-disk persisted | |
* data, but it will drop any data that has not yet been persisted. | |
*/ | |
void close(); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
is
pushAll
functionally equivalent topush(getSegments(), committer)
?