Skip to content

Instantly share code, notes, and snippets.

@aranibatta
Created March 17, 2018 16:24
Show Gist options
  • Save aranibatta/f328672d31965c941d85fd88471257b6 to your computer and use it in GitHub Desktop.
Save aranibatta/f328672d31965c941d85fd88471257b6 to your computer and use it in GitHub Desktop.
some code
package edu.berkeley.cs186.database.table;
import java.util.Arrays;
import java.io.Closeable;
import java.nio.ByteBuffer;
import java.util.Iterator;
import java.util.List;
import java.util.NoSuchElementException;
import java.util.TreeSet;
import edu.berkeley.cs186.database.DatabaseException;
import edu.berkeley.cs186.database.common.ArrayBacktrackingIterator;
import edu.berkeley.cs186.database.common.BacktrackingIterator;
import edu.berkeley.cs186.database.common.Bits;
import edu.berkeley.cs186.database.databox.DataBox;
import edu.berkeley.cs186.database.io.Page;
import edu.berkeley.cs186.database.io.PageAllocator;
import edu.berkeley.cs186.database.io.PageAllocator.PageIterator;
/**
* # Overview
* A Table represents a database table with which users can insert, get,
* update, and delete records:
*
* // Create a brand new table t(x: int, y: int) which is persisted in the
* // file "t.table".
* List<String> fieldNames = Arrays.asList("x", "y");
* List<String> fieldTypes = Arrays.asList(Type.intType(), Type.intType());
* Schema schema = new Schema(fieldNames, fieldTypes);
* Table t = new Table(schema, "t", "t.table");
*
* // Insert, get, update, and delete records.
* List<DataBox> a = Arrays.asList(new IntDataBox(1), new IntDataBox(2));
* List<DataBox> b = Arrays.asList(new IntDataBox(3), new IntDataBox(4));
* RecordId rid = t.addRecord(a);
* Record ra = t.getRecord(rid);
* t.updateRecord(b, rid);
* Record rb = t.getRecord(rid);
* t.deleteRecord(rid);
*
* // Close the table. All tables must be closed.
* t.close();
*
* # Persistence
* Every table constructs a new PageAllocator which it uses to persist its data
* into a file. For example, the table above persists itself into a file
* "t.table". We can later run the following code to reload the table:
*
* // Load the table t from the file "t.table". Unlike above, we do not have
* // to specify the schema of t because it will be parsed from "t.table".
* Table t = new Table("t", "t.table");
* // Don't forget to close the table.
* t.close();
*
* # Storage Format
* Now, we discuss how tables serialize their data into files.
*
* 1. Each file begins with a header page into which tables serialize their
* schema.
* 2. All remaining pages are data pages. Every data page begins with an
* n-byte bitmap followed by m records. The bitmap indicates which records
* in the page are valid. The values of n and m are set to maximize the
* number of records per page (see computeDataPageNumbers for details).
*
* For example, here is a cartoon of what a table's file would look like if we
* had 5-byte pages and 1-byte records:
*
* Serialized Schema___________________________________
* / \
* +----------+----------+----------+----------+----------+ \
* Page 0 | 00000001 | 00000001 | 01111111 | 00000001 | 00000100 | |- header
* +----------+----------+----------+----------+----------+ /
* +----------+----------+----------+----------+----------+ \
* Page 1 | 1001xxxx | 01111010 | xxxxxxxx | xxxxxxxx | 01100001 | |
* +----------+----------+----------+----------+----------+ |
* Page 2 | 1101xxxx | 01110010 | 01100100 | xxxxxxxx | 01101111 | |- data
* +----------+----------+----------+----------+----------+ |
* Page 3 | 0011xxxx | xxxxxxxx | xxxxxxxx | 01111010 | 00100001 | |
* +----------+----------+----------+----------+----------+ /
* \________/ \________/ \________/ \________/ \________/
* bitmap record 0 record 1 record 2 record 3
*
* - The first page (Page 0) is the header page and contains the serialized
* schema.
* - The second page (Page 1) is a data page. The first byte of this data page
* is a bitmap, and the next four bytes are each records. The first and
* fourth bit are set indicating that record 0 and record 3 are valid.
* Record 1 and record 2 are invalid, so we ignore their contents.
* Similarly, the last four bits of the bitmap are unused, so we ignore
* their contents.
* - The third and fourth page (Page 2 and 3) are also data pages and are
* formatted similar to Page 1.
*
* When we add a record to a table, we add it to the very first free slot in
* the table. See addRecord for more information.
*/
public class Table implements Iterable<Record>, Closeable {
public static final String FILENAME_PREFIX = "db";
public static final String FILENAME_EXTENSION = ".table";
// The name of the database.
private String name;
// The filename of the file in which this table is persisted.
private String filename;
// The schema of the database.
private Schema schema;
// The allocator used to persist the database.
private PageAllocator allocator;
// The size (in bytes) of the bitmap found at the beginning of each data page.
private int bitmapSizeInBytes;
// The number of records on each data page.
private int numRecordsPerPage;
// The page numbers of all allocated pages which have room for more records.
private TreeSet<Integer> freePageNums;
// The number of records in the table.
private long numRecords;
// Constructors //////////////////////////////////////////////////////////////
/**
* Construct a brand new table named `name` with schema `schema` persisted in
* file `filename`.
*/
public Table(String name, Schema schema, String filename) {
this.name = name;
this.filename = filename;
this.schema = schema;
this.allocator = new PageAllocator(filename, true);
this.bitmapSizeInBytes = computeBitmapSizeInBytes(Page.pageSize, schema);
numRecordsPerPage = computeNumRecordsPerPage(Page.pageSize, schema);
this.freePageNums = new TreeSet<Integer>();
this.numRecords = 0;
writeSchemaToHeaderPage(allocator, schema);
}
/**
* Load a table named `name` from the file `filename`. The schema of the
* table will be read from the header page of the file.
*/
public Table(String name, String filename) throws DatabaseException {
this.name = name;
this.filename = filename;
this.allocator = new PageAllocator(filename, false);
this.schema = readSchemaFromHeaderPage(this.allocator);
this.bitmapSizeInBytes = computeBitmapSizeInBytes(Page.pageSize, this.schema);
this.numRecordsPerPage = computeNumRecordsPerPage(Page.pageSize, this.schema);
this.freePageNums = new TreeSet<Integer>();
this.numRecords = 0;
Iterator<Page> iter = this.allocator.iterator();
iter.next(); // Skip the header page.
while(iter.hasNext()) {
Page page = iter.next();
byte[] bitmap = getBitMap(page);
for (short i = 0; i < numRecordsPerPage; ++i) {
if (Bits.getBit(bitmap, i) == Bits.Bit.ONE) {
Record r = getRecord(new RecordId(page.getPageNum(), i));
numRecords++;
}
}
if (numRecordsOnPage(page) != numRecordsPerPage) {
freePageNums.add(page.getPageNum());
}
}
}
// Accessors /////////////////////////////////////////////////////////////////
public String getName() {
return name;
}
public String getFilename() {
return filename;
}
public Schema getSchema() {
return schema;
}
public PageAllocator getAllocator() {
return allocator;
}
public int getBitmapSizeInBytes() {
return bitmapSizeInBytes;
}
public int getNumRecordsPerPage() {
return numRecordsPerPage;
}
public long getNumRecords() {
return numRecords;
}
public int getNumDataPages() {
// All pages but the first are data pages.
return allocator.getNumPages() - 1;
}
public byte[] getBitMap(Page page) {
byte[] bytes = new byte[bitmapSizeInBytes];
page.getByteBuffer().get(bytes);
return bytes;
}
public static int computeBitmapSizeInBytes(int pageSize, Schema schema) {
// Dividing by 8 simultaneously (a) rounds down the number of records to a
// multiple of 8 and (b) converts bits to bytes.
return computeUnroundedNumRecordsPerPage(pageSize, schema) / 8;
}
public static int computeNumRecordsPerPage(int pageSize, Schema schema) {
// Dividing by 8 and then multiplying by 8 rounds down to the nearest
// multiple of 8.
return computeUnroundedNumRecordsPerPage(pageSize, schema) / 8 * 8;
}
// Modifiers /////////////////////////////////////////////////////////////////
private synchronized void insertRecord(Page page, int entryNum, Record record) {
int offset = bitmapSizeInBytes + (entryNum * schema.getSizeInBytes());
byte[] bytes = record.toBytes(schema);
ByteBuffer buf = page.getByteBuffer();
buf.position(offset);
buf.put(bytes);
}
/**
* addRecord adds a record to this table and returns the record id of the
* newly added record. freePageNums, and numRecords are updated
* accordingly. The record is added to the first free slot of the first free
* page (if one exists, otherwise one is allocated). For example, if the
* first free page has bitmap 0b11101000, then the record is inserted into
* the page with index 3 and the bitmap is updated to 0b11111000.
*/
public synchronized RecordId addRecord(List<DataBox> values) throws DatabaseException {
Record record = schema.verify(values);
// Get a free page, allocating a new one if necessary.
if (freePageNums.isEmpty()) {
freePageNums.add(allocator.allocPage());
}
Page page = allocator.fetchPage(freePageNums.first());
// Find the first empty slot in the bitmap.
// entry number of the first free slot and store it in entryNum; and (2) we
// count the total number of entries on this page.
byte[] bitmap = getBitMap(page);
int entryNum = 0;
for (; entryNum < numRecordsPerPage; ++entryNum) {
if (Bits.getBit(bitmap, entryNum) == Bits.Bit.ZERO) {
break;
}
}
assert(entryNum < numRecordsPerPage);
// Insert the record and update the bitmap.
insertRecord(page, entryNum, record);
Bits.setBit(page.getByteBuffer(), entryNum, Bits.Bit.ONE);
// Update the metadata.
if (numRecordsOnPage(page) == numRecordsPerPage) {
freePageNums.pollFirst();
}
numRecords++;
return new RecordId(page.getPageNum(), (short) entryNum);
}
/**
* Retrieves a record from the table, throwing an exception if no such record
* exists.
*/
public synchronized Record getRecord(RecordId rid) throws DatabaseException {
validateRecordId(rid);
Page page = allocator.fetchPage(rid.getPageNum());
byte[] bitmap = getBitMap(page);
if (Bits.getBit(bitmap, rid.getEntryNum()) == Bits.Bit.ZERO) {
String msg = String.format("Record %s does not exist.", rid);
throw new DatabaseException(msg);
}
int offset = bitmapSizeInBytes + (rid.getEntryNum() * schema.getSizeInBytes());
ByteBuffer buf = page.getByteBuffer();
buf.position(offset);
return Record.fromBytes(buf, schema);
}
/**
* Overwrites an existing record with new values and returns the existing
* record. An exception is thrown if rid does
* not correspond to an existing record in the table.
*/
public synchronized Record updateRecord(List<DataBox> values, RecordId rid) throws DatabaseException {
validateRecordId(rid);
Record newRecord = schema.verify(values);
Record oldRecord = getRecord(rid);
Page page = allocator.fetchPage(rid.getPageNum());
insertRecord(page, rid.getEntryNum(), newRecord);
return oldRecord;
}
/**
* Deletes and returns the record specified by rid from the table and updates
* freePageNums, and numRecords as necessary. An exception is thrown
* if rid does not correspond to an existing record in the table.
*/
public synchronized Record deleteRecord(RecordId rid) throws DatabaseException {
validateRecordId(rid);
Page page = allocator.fetchPage(rid.getPageNum());
Record record = getRecord(rid);
Bits.setBit(page.getByteBuffer(), rid.getEntryNum(), Bits.Bit.ZERO);
if(numRecordsOnPage(page) == numRecordsPerPage - 1) {
freePageNums.add(page.getPageNum());
}
numRecords--;
return record;
}
public void close() {
allocator.close();
}
// Helpers ///////////////////////////////////////////////////////////////////
private static Schema readSchemaFromHeaderPage(PageAllocator allocator) {
Page headerPage = allocator.fetchPage(0);
ByteBuffer buf = headerPage.getByteBuffer();
return Schema.fromBytes(buf);
}
private static void writeSchemaToHeaderPage(PageAllocator allocator, Schema schema) {
Page headerPage = allocator.fetchPage(allocator.allocPage());
assert(0 == headerPage.getPageNum());
ByteBuffer buf = headerPage.getByteBuffer();
buf.put(schema.toBytes());
}
/**
* Recall that every data page contains an m-byte bitmap followed by n
* records. The following three functions computes m and n such that n is
* maximized. To simplify things, we round n down to the nearest multiple of
* 8 if necessary. m and n are stored in bitmapSizeInBytes and
* numRecordsPerPage respectively.
*
* Some examples:
*
* | Page Size | Record Size | bitmapSizeInBytes | numRecordsPerPage |
* | --------- | ----------- | ----------------- | ----------------- |
* | 9 bytes | 1 byte | 1 | 8 |
* | 10 bytes | 1 byte | 1 | 8 |
* ...
* | 17 bytes | 1 byte | 1 | 8 |
* | 18 bytes | 2 byte | 2 | 16 |
* | 19 bytes | 2 byte | 2 | 16 |
*/
private static int computeUnroundedNumRecordsPerPage(int pageSize, Schema schema) {
// Storing each record requires 1 bit for the bitmap and 8 *
// schema.getSizeInBytes() bits for the record.
int recordOverheadInBits = 1 + 8 * schema.getSizeInBytes();
int pageSizeInBits = pageSize * 8;
return pageSizeInBits / recordOverheadInBits;
}
private int numRecordsOnPage(Page page) {
byte[] bitmap = getBitMap(page);
int numRecords = 0;
for (int i = 0; i < numRecordsPerPage; ++i) {
if (Bits.getBit(bitmap, i) == Bits.Bit.ONE) {
numRecords++;
}
}
return numRecords;
}
private void validateRecordId(RecordId rid) throws DatabaseException {
int p = rid.getPageNum();
int e = rid.getEntryNum();
if (p == 0) {
throw new DatabaseException("Page 0 is a header page, not a data page.");
}
if (e < 0) {
String msg = String.format("Invalid negative entry number %d.", e);
throw new DatabaseException(msg);
}
if (e >= numRecordsPerPage) {
String msg = String.format(
"There are only %d records per page, but record %d was requested.",
numRecordsPerPage, e);
throw new DatabaseException(msg);
}
}
// Iterators /////////////////////////////////////////////////////////////////
public TableIterator ridIterator() {
return new TableIterator();
}
public RecordIterator iterator() {
return new RecordIterator(this, ridIterator());
}
public BacktrackingIterator<Record> blockIterator(Page[] block) {
return new RecordIterator(this, new RIDBlockIterator(block));
}
public BacktrackingIterator<Record> blockIterator(BacktrackingIterator<Page> block) {
return new RecordIterator(this, new RIDBlockIterator(block));
}
public BacktrackingIterator<Record> blockIterator(Iterator<Page> block, int maxRecords) {
return new RecordIterator(this, new RIDBlockIterator(block, maxRecords));
}
/**
* RIDPageIterator is a BacktrackingIterator over the RecordIds of a single
* page of the table.
*
* See comments on the BacktrackingIterator interface for how mark and reset
* should function.
*/
public class RIDPageIterator implements BacktrackingIterator<RecordId> {
//member variables go here
private Page page;
private int mark;
private int next;
private int prev;
/**
* The following method signature is provided for guidance, but not necessary. Feel free to
* implement your own solution using whatever helper methods you would like.
*/
public RIDPageIterator(Page page) {
this.page = page;
this.next = -1000;
this.mark = -1000;
this.prev = -1000;
for (int index = 0; index < numRecordsPerPage; index++) {
if (Bits.getBit(getBitMap(this.page), index) == Bits.Bit.ONE) {
this.next = index;
break;
}
}
}
public boolean hasNext() {
if (this.next != -1) {
return true;
}
return false;
}
public RecordId next() {
if (this.next != -1000) {
RecordId result = new RecordId(this.page.getPageNum(), (short) this.next);
this.prev = this.next;
for (int index = 1 + this.next; index < numRecordsPerPage; index++) {
if (Bits.getBit(getBitMap(this.page), index) == Bits.Bit.ONE) {
this.next = index;
break;
}
}
if (this.next == this.prev) {
this.next = -1000;
}
return result;
}
throw new NoSuchElementException()
}
public void mark() {
if (this.prev != -1000) {
this.mark = this.prev;
}
}
public void reset() {
if (this.mark != -1000) {
this.prev = -1000;
this.next = this.mark;
}
}
}
/**
* Helper function to create a BacktrackingIterator from an Iterator of
* Pages, and a maximum number of pages.
*
* At most maxPages pages will be loaded into the iterator; if there are
* not enough pages available, then fewer pages will be used.
*/
private static BacktrackingIterator<Page> getBlockFromIterator(Iterator<Page> pageIter, int maxPages) {
Page[] block = new Page[maxPages];
int numPages;
for (numPages = 0; numPages < maxPages && pageIter.hasNext(); ++numPages) {
block[numPages] = pageIter.next();
}
if (numPages < maxPages) {
Page[] temp = new Page[numPages];
System.arraycopy(block, 0, temp, 0, numPages);
block = temp;
}
return new ArrayBacktrackingIterator(block);
}
/**
* RIDBlockIterator is a BacktrackingIterator yielding RecordIds of a block
* of pages.
*
* A "block" is specified by a BacktrackingIterator of Pages: every single
* Page returned by the iterator is part of the block. Your code should only
* utilize this iterator's functionality for fetching pages, i.e. you should
* *not* fetch every Page from the block iterator into an array or collection.
*
* The mark and reset methods have been provided for you already, and work by
* saving a BacktrackingIterator of RecordIds over the appropriate page.
*
* The iterator maintains a few pieces of state:
* - block is simply the BacktrackingIterator<Page> specifying the pages in
* the block.
* - blockIter is a BacktrackingIterator over RecordIds of the current page we
* are iterating over.
* - prevRecordId is the last RecordId that next() returned.
* - nextRecordId is the next RecordId that next() will return.
*
* In addition to these, we maintain some state to help with the
* implementation of mark() and reset(); you should not need to use these
* for implementing next() and hasNext().
*/
public class RIDBlockIterator implements BacktrackingIterator<RecordId> {
private BacktrackingIterator<Page> block = null;
private BacktrackingIterator<RecordId> blockIter = null;
private BacktrackingIterator<RecordId> markedBlockIter = null;
private RecordId markedPrevRecordId = null;
private RecordId prevRecordId = null;
private RecordId nextRecordId = null;
public RIDBlockIterator(BacktrackingIterator<Page> block) {
this.block = block;
this.page = this.block.next();
this.blockIter = new RIDPageIterator(page);
if (blockIter.hasNext()) {
this.nextRecordId = blockIter.next();
this.blockIter.mark();
}
this.blockIter.reset();
}
/**
* This is an extra constructor that allows one to create an
* RIDBlockIterator by taking the first maxPages of an iterator of Pages.
*
* If there are fewer than maxPages number of Pages available in pageIter,
* then all remaining pages shall be used in the "block"; otherwise,
* only the first maxPages number of pages shall be used.
*
* Note that this also advances pageIter by maxPages, so you can do the
* following:
*
* Iterator<Page> pageIter = // ...
* RIDBlockIterator firstBlock = new RIDBlockIterator(pageIter, 100);
* RIDBlockIterator secondBlock = new RIDBlockIterator(pageIter, 100);
* RIDBlockIterator thirdBlock = new RIDBlockIterator(pageIter, 100);
*
* to get iterators over the first 100 pages, second 100 pages, and third
* 100 pages.
*/
public RIDBlockIterator(Iterator<Page> pageIter, int maxPages) {
this(Table.getBlockFromIterator(pageIter, maxPages));
}
/**
* This is an extra constructor that allows one to create an
* RIDBlockIterator over an array of Pages.
*
* Every page in the pages array will be used in the block of pages.
*/
public RIDBlockIterator(Page[] pages) {
this(new ArrayBacktrackingIterator(pages));
}
public boolean hasNext() {
if (!this.blockIter.hasNext() && this.block.hasNext()) {
this.page = this.block.next();
this.blockIter = new RIDPageIterator(page);
return this.hasNext();
} else if (!this.blockIter.hasNext()) {
return false;
}
return true;
}
public RecordId next() {
if(!this.hasNext()) {
this.nextRecordId = null;
if (!this.block.hasNext()) {
throw new NoSuchElementException();
}
}
this.prevRecordId = this.nextRecordId;
this.nextRecordId = this.blockIter.next();
return this.nextRecordId;
}
/**
* Marks the last recordId returned by next().
*
* This implementation of mark simply marks and saves the current page's
* iterator of RecordIds.
*/
public void mark() {
if (this.prevRecordId == null) {
return;
}
this.block.mark();
this.blockIter.mark();
this.markedBlockIter = this.blockIter;
this.markedPrevRecordId = this.prevRecordId;
}
/**
* Resets to the marked recordId.
*
* This implementation of reset restores the marked page's iterator,
* and calls reset() on it to move it to the correct record. Some extra
* care is taken to ensure that we properly reset the block page iterator.
*/
public void reset() {
if (this.markedPrevRecordId == null) {
return;
}
this.block.reset();
// We don't want to get the current page again
this.block.next();
this.blockIter = this.markedBlockIter;
this.blockIter.reset();
// If we're at the end of the block, we don't want to repeat the record
if (!this.block.hasNext()) {
this.blockIter.next();
if (this.blockIter.hasNext()) {
this.blockIter.reset();
}
}
this.prevRecordId = null;
this.nextRecordId = this.markedPrevRecordId;
}
}
/**
* A helper function that returns the same iterator passed in, but with
* a single page skipped.
*/
private static Iterator<Page> iteratorSkipPage(Iterator<Page> iter) {
iter.next();
return iter;
}
/**
* TableIterator is an Iterator over the record IDs of a table.
*
* This is just a very thin wrapper around RIDBlockIterator, where the "block"
* is an iterator of all the pages of the table (minus the header page). Once
* RIDBlockIterator is filled in, all tests on TableIterator should
* automatically pass.
*/
public class TableIterator extends RIDBlockIterator {
public TableIterator() {
super((BacktrackingIterator<Page>) Table.iteratorSkipPage(Table.this.allocator.iterator()));
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment