Skip to content

Instantly share code, notes, and snippets.

@jmhodges
Created July 20, 2009 09:12
Show Gist options
  • Save jmhodges/150217 to your computer and use it in GitHub Desktop.
Save jmhodges/150217 to your computer and use it in GitHub Desktop.
/**
* Calculates the splits that will serve as input for the map tasks.
* <ul>
* Splits are created in number equal to the smallest between numSplits and
* the number of {@link HRegion}s in the table. If the number of splits is
* smaller than the number of {@link HRegion}s then splits are spanned across
* multiple {@link HRegion}s and are grouped the most evenly possible. In the
* case splits are uneven the bigger splits are placed first in the
* {@link InputSplit} array.
*
* @param job the map task {@link JobConf}
* @param numSplits a hint to calculate the number of splits (mapred.map.tasks).
*
* @return the input splits
*
* @see org.apache.hadoop.mapred.InputFormat#getSplits(org.apache.hadoop.mapred.JobConf, int)
*/
public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
byte [][] startKeys = this.table.getStartKeys();
if (startKeys == null || startKeys.length == 0) {
throw new IOException("Expecting at least one region");
}
if (this.table == null) {
throw new IOException("No table was provided");
}
if (this.inputColumns == null || this.inputColumns.length == 0) {
throw new IOException("Expecting at least one column");
}
int realNumSplits = numSplits > startKeys.length? startKeys.length:
numSplits;
InputSplit[] splits = new InputSplit[realNumSplits];
int middle = startKeys.length / realNumSplits;
int startPos = 0;
for (int i = 0; i < realNumSplits; i++) {
int lastPos = startPos + middle;
lastPos = startKeys.length % realNumSplits > i ? lastPos + 1 : lastPos;
String regionLocation = table.getRegionLocation(startKeys[startPos]).
getServerAddress().getHostname();
splits[i] = new TableSplit(this.table.getTableName(),
startKeys[startPos], ((i + 1) < realNumSplits) ? startKeys[lastPos]:
HConstants.EMPTY_START_ROW, regionLocation);
LOG.info("split: " + i + "->" + splits[i]);
startPos = lastPos;
}
return splits;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment