Skip to content

Instantly share code, notes, and snippets.

@vgangireddyin
Created June 5, 2016 07:35
Show Gist options
  • Save vgangireddyin/86524cee4c89658629a4cf4baf9ed6b5 to your computer and use it in GitHub Desktop.
Save vgangireddyin/86524cee4c89658629a4cf4baf9ed6b5 to your computer and use it in GitHub Desktop.
BPlus Tree in CPP
#include <vector>
#include <stdio.h>
#include <stack>
#include <stdlib.h>
#include <fstream>
#include <math.h>
#include <time.h>
using namespace std;
int nextPointer = 0, rfp, keySize; //track next pointer value, root pointer and key sizes
struct BplusNode
{
public:
vector<int> filePointers; // these are served as file pointers and object pointers as well
vector<float> keys; // constructed as all keys are in sorted order
int next, prev; // in case of leaf these would help
int isLeaf; // 0 for internal node and 1 for leaf node
};
struct splitReturn // this will combine node in memory pointer and its int pointer used in splitNode method
{
BplusNode node;
int childptr;
};
struct KNNElement
{
float key;
float distance;
};
int calcKeySize(int s)
{
s = s - 3 * sizeof(int); // extra file pointer when overload, next and prev
s = s - sizeof(float); // extra key when overload
return (s - sizeof(int)) / (sizeof(int) + sizeof(float)) ;
}
BplusNode createNewNode(int isLeaf)
{
BplusNode newNode;
newNode.next = -1;
newNode.prev = -1;
newNode.isLeaf = isLeaf;
return newNode;
}
//file structure: prev, next, fp 1, val 1, fp 2, val 2,..., fp n val n, fp n+1
//fp1(special pointer) : +ve in case of internal node, -ve in case of leaf node
BplusNode loadIntoMemory(int fpt)
{
BplusNode node;
char fname[33];
int temp;
float temp2;
sprintf(fname, "%d.bpt", fpt);
FILE *infile = fopen(fname, "rb");
if(infile != NULL)
{
fread((char*)&node.prev, sizeof(node.prev), 1, infile);
fread((char*)&node.next, sizeof(node.next), 1, infile);
//special pointer
fread((char*)&temp, sizeof(temp), 1, infile);
node.filePointers.push_back(temp);
if(temp >= 0)
node.isLeaf = 0;
else
node.isLeaf = 1;
while(1)
{
//printf("loading keys \n");
if(fread((char*)&temp2, sizeof(temp2), 1, infile) == 0)
break;
node.keys.push_back(temp2);
fread((char*)&temp, sizeof(temp), 1, infile);
node.filePointers.push_back(temp);
}
}
else
{
node = createNewNode(1);
}
fclose(infile);
return node;
}
void writeBackFile(BplusNode &node, int fpt)
{
char fname[33];
sprintf(fname, "%d.bpt", fpt);
FILE *outfile = fopen(fname, "wb");
fwrite((char*)&node.prev, sizeof(node.prev), 1, outfile);
fwrite((char*)&node.next, sizeof(node.next), 1, outfile);
//write special pointer
fwrite((char*)&node.filePointers[0], sizeof(node.filePointers[0]), 1, outfile);
for(int i = 0; i < node.keys.size(); i++)
{
fwrite((char*)&node.keys[i], sizeof(node.keys[i]), 1, outfile);
fwrite((char*)&node.filePointers[i+1], sizeof(node.filePointers[i+1]), 1, outfile);
}
fclose(outfile);
node.filePointers.clear();
node.keys.clear();
}
// if value not present it will return the index of next maximum, if multiple values are there it will return the least index
int findIndex(vector<float> &keys, int from, int to, float value)
{
if(from == to)
{
// printf("this should execute for first time\n %f\t%f\t%d\n", keys[from], value, from);
if(keys[from] < value)
{
return from + 1;
}
else
{
return from;
}
}
else
{
int mid = (from + to) / 2;
if(keys[mid] >= value)
return findIndex(keys, from, mid, value);
else
return findIndex(keys, mid+1, to, value);
}
}
//merge two sorted lists and return best k
vector<float> mergeLists(vector<KNNElement> &l, vector<KNNElement> &r, int k)
{
vector<float> ans;
int i = 0, j = 0, cnt = k;
if(i == l.size())
{
while(cnt)
{
ans.push_back(r[j].key);
j++;
cnt--;
}
}
if(j == r.size())
{
while(cnt)
{
ans.push_back(l[i].key);
i++;
cnt--;
}
}
while(cnt)
{
if(l[i].distance <= r[j].distance)
{
ans.push_back(l[i].key);
i++;
cnt--;
}
else
{
ans.push_back(r[j].key);
j++;
cnt--;
}
if(i == l.size())
{
printf("it should execute \n");
while(cnt)
{
ans.push_back(r[j].key);
j++;
cnt--;
}
}
if(j == r.size())
{
while(cnt)
{
ans.push_back(l[i].key);
i++;
cnt--;
}
}
}
return ans;
}
//split the parent of node return next split is need or not, if yes +ve number that is parent pointer need to process
splitReturn splitNode(BplusNode &node, int npt, int ppt)
{
int mid = keySize / 2;
int spt = ++nextPointer;
BplusNode newSibling = createNewNode(node.isLeaf);
//set special pointer
if(node.isLeaf)
newSibling.filePointers.push_back(-1);
else
newSibling.filePointers.push_back(node.filePointers[mid]);
//share items
for(int i = (mid + 1); i < node.keys.size(); i++)
{
newSibling.keys.push_back(node.keys[i]);
newSibling.filePointers.push_back(node.filePointers[i+1]);
}
//clean node
node.keys.erase(node.keys.begin() + (mid + 1), node.keys.end());
node.filePointers.erase(node.filePointers.begin() + (mid + 1), node.filePointers.end());
//leaf case, make pointers
if(node.isLeaf)
{
int temp = node.next;
node.next = spt;
newSibling.prev = npt;
newSibling.next = temp;
if(temp != -1)
{
BplusNode tempnode = loadIntoMemory(temp);
tempnode.prev = spt;
writeBackFile(tempnode, temp);
}
}
//root case
if(ppt == -1)
{
int rpt = ++nextPointer;
BplusNode newNode = createNewNode(0);
newNode.keys.push_back(node.keys[mid]);
//update root pointers
newNode.filePointers.push_back(npt);
newNode.filePointers.push_back(spt);
//write back data
writeBackFile(node, npt);
writeBackFile(newSibling, spt);
writeBackFile(newNode, rpt);
//change new node as root
rfp = rpt;
BplusNode dummy;
splitReturn ans = {dummy, -1};
return ans;
}
//non-root case, parent contains the elements and be careful to insert new key into parent
else
{
BplusNode parent = loadIntoMemory(ppt);
//insert key-mid in perfect place
int index = findIndex(parent.keys, 0, parent.keys.size() - 1, node.keys[mid]); // if equal keys : no problem, if two keys are equal all its children are equal
parent.keys.insert(parent.keys.begin() + index, node.keys[mid]);
parent.filePointers.insert(parent.filePointers.begin() + (index + 1), spt);
//write back data
writeBackFile(node, npt);
writeBackFile(newSibling, spt);
if(parent.keys.size() < keySize)
{
writeBackFile(parent, ppt);
BplusNode dummy;
splitReturn ans = {dummy, -1};
return ans;
}
else
{
splitReturn ans = {parent, ppt};
return ans;
}
}
}
//using split node implement insert node, iterative version
void insertValue(float value, int objpointer)
{
stack<int> path;
BplusNode root = loadIntoMemory(rfp);
int next = rfp, pfp, cfp = 0;
//move to corresponding leaf node
while(root.isLeaf == 0)
{
path.push(next);
int index = findIndex(root.keys, 0, root.keys.size() - 1, value); //it will return corresponding index
next = root.filePointers[index];
cfp = next;
root = loadIntoMemory(next);
}
if(root.keys.size() < keySize)
{
//init case
if(root.keys.size() == 0)
{
root.filePointers.push_back(-1);
root.keys.push_back(value);
root.filePointers.push_back(objpointer);
writeBackFile(root, rfp);
}
else
{
int index = findIndex(root.keys, 0, root.keys.size() - 1, value);
//printf("index %d\n", index);
root.keys.insert(root.keys.begin() + index, value);
root.filePointers.insert(root.filePointers.begin() + (index + 1), objpointer);
//write back root
writeBackFile(root, cfp);
}
}
else
{
// insert value in node, but call split
int index = findIndex(root.keys, 0, root.keys.size() - 1, value);
root.keys.insert(root.keys.begin() + index, value);
root.filePointers.insert(root.filePointers.begin() + (index + 1), objpointer);
BplusNode child = root;
//track down the path
do
{
//root case
if(path.size() == 0)
{
splitReturn check = splitNode(child, cfp, -1);
child = check.node;
cfp = check.childptr;
}
else
{
pfp = path.top();
path.pop();
splitReturn check = splitNode(child, cfp, pfp);
child = check.node;
cfp = check.childptr;
}
}while(cfp != -1);
}
}
vector<float> windowQuery(float a, float b)
{
int next = rfp, Index = 0;
vector<float> ans;
bool stopit = false, first = true;;
BplusNode node = loadIntoMemory(next);
//move to the proper leaf
while(node.isLeaf == 0)
{
Index = findIndex(node.keys, 0, node.keys.size() - 1, a);
next = node.filePointers[Index];
node = loadIntoMemory(next);
}
//sequential search from leaf
while(!stopit)
{
if(first)
Index = findIndex(node.keys, 0, node.keys.size() - 1, a);
else
Index = 0;
for(int i = Index; i < node.keys.size(); i++)
{
first = false;
if(node.keys[i] <= b)
{
ans.push_back(node.keys[i]);
}
else
{
stopit = true;
break;
}
}
if(node.next == -1)
break;
node = loadIntoMemory(node.next);
}
return ans;
}
vector<float> rangeQuery(float center, float range)
{
return windowQuery(center-range, center+range);
}
bool pointQuery(float a)
{
return windowQuery(a, a).size() > 0;
}
vector<float> kNNQuery(float center, int k)
{
vector<KNNElement> left, right;
KNNElement temp;
int next = rfp, Index = 0, centerLeaf, cnt = k;
bool first = true, stopit = false;
BplusNode node = loadIntoMemory(next);
//move to the proper leaf
while(node.isLeaf == 0)
{
Index = findIndex(node.keys, 0, node.keys.size() - 1, center);
next = node.filePointers[Index];
node = loadIntoMemory(next);
}
centerLeaf = next;
//load all right
while(!stopit)
{
if(first)
Index = findIndex(node.keys, 0, node.keys.size() - 1, center);
else
Index = 0;
for(int i = Index; i < node.keys.size(); i++)
{
first = false;
if(cnt--)
{
temp = {node.keys[i], fabs(node.keys[i] - center)};
left.push_back(temp);
}
else
{
stopit = true;
break;
}
}
if(node.next == -1)
break;
node = loadIntoMemory(node.next);
}
//load all left
cnt = k;
next = centerLeaf;
node = loadIntoMemory(next);
stopit = false;
first = true;
while(!stopit)
{
if(first)
Index = findIndex(node.keys, 0, node.keys.size() - 1, center);
else
Index = node.keys.size();
for(int i = Index - 1; i >= 0; i--)
{
first = false;
if(cnt--)
{
temp = {node.keys[i], fabs(node.keys[i] - center)};
right.push_back(temp);
}
else
{
stopit = true;
break;
}
}
if(node.prev == -1)
break;
node = loadIntoMemory(node.prev);
}
//merge left and right and pick best k
return mergeLists(left, right, k);
}
//for testing
void testTree()
{
int next = rfp;
BplusNode node = loadIntoMemory(next);
printf("testing\n");
while(node.isLeaf == 0)
{
next = node.filePointers[0];
printf("next is %d\n", next);
node = loadIntoMemory(next);
}
while(1)
{
printf("%d\t%d\t%d\n", node.filePointers[0], node.prev, node.next);
for(int i = 0; i < node.keys.size(); i++)
{
printf("%f\t", node.keys[i]);
printf("%d\n", node.filePointers[i+1]);
}
if(node.next == -1)
break;
node = loadIntoMemory(node.next);
}
}
int main()
{
clock_t start;
float temp, x, y, t;
int i, objptr = 0, choice, K , j = 0, pagesize;
bool check = false;
char obj[13];
vector<float> ans;
FILE *fpinfo = fopen("bplus.info", "a"); // bplus.info contain the root and nextpointer values
FILE *fp = fopen("assgn3_bplus_data.txt", "r");
FILE *fpdata = fopen("bplus.data", "wb");
FILE *queryFile = fopen("assgn3_bplus_querysample.txt", "r");
FILE *reportFile = fopen("bplusreport.csv", "w");
FILE *outputFile = fopen("bplusoutput.txt", "w");
FILE *config = fopen("bplustree.config","r");
if(fpinfo == NULL)
{
rfp = 0;
nextPointer = 0;
}
else
{
fscanf(fpinfo, "%d%d", &rfp, &nextPointer);
}
fscanf(config, "%d", &pagesize);
keySize = calcKeySize(pagesize);
//build tree
i = fscanf(fp, "%f%s", &temp, obj);
float time = 0;
while(i == 2)
{
j++;
if(j % 5000 == 0)
{
printf("inserting %d : %f\t%s\n", j, temp, obj);
}
fwrite (obj , sizeof(char), 13, fpdata);
start = clock();
insertValue(temp, objptr);
time += float( clock() - start);
objptr += 13;
i = fscanf(fp, "%f%s", &temp, obj);
}
printf("Entire tree is build in %f time units", time );
j = 0;
while(fscanf(queryFile, "%d", &choice) == 1)
{
j++;
if(j % 500 == 0)
{
printf("accessing %d query\n", j);
}
switch(choice)
{
case 0:
fscanf(queryFile,"%f%s", &temp, obj);
fwrite (obj , sizeof(char), 13, fpdata);
start = clock();
insertValue(temp, objptr);
t = float( clock() - start ) ;
objptr += 13;
fprintf(reportFile, "%d,%f\n", choice, t);
fprintf(outputFile,"%d\t%f\t%s\n", choice, temp, obj);
break;
case 1:
fscanf(queryFile,"%f", &x);
start = clock();
check = pointQuery(x);
t = float( clock() - start ) ;
fprintf(reportFile, "%d,%f\n", choice, t);
fprintf(outputFile,"%d\t%f\n", choice, x);
if(check)
fprintf(outputFile,"%f\n", x);
break;
case 2:
ans.clear();
fscanf(queryFile,"%f%f", &x, &y);
start = clock();
ans = rangeQuery(x, y);
t = float( clock() - start ) ;
fprintf(reportFile, "%d,%f,%d\n", choice, t, ans.size());
fprintf(outputFile,"%d\t%f\t%f\n", choice, x, y);
// printf("%d\t%f\t%f\t%f\n", choice, qurpoint.p[0], qurpoint.p[1], range);
for(int i = 0; i < ans.size(); i++)
fprintf(outputFile,"%f\n", ans[i]);
break;
case 3:
ans.clear();
fscanf(queryFile,"%f%d", &x, &K);
start = clock();
ans = kNNQuery(x, K);
t = float( clock() - start ) ;
fprintf(reportFile, "%d,%f,%d\n", choice, t, ans.size());
fprintf(outputFile,"%d\t%f\t%d\n", choice, x, K);
// printf("%d\t%f\t%f\t%d\n", choice, qurpoint.p[0], qurpoint.p[1], K);
for(int i = 0; i < ans.size(); i++)
fprintf(outputFile,"%f\n", ans[i]);
break;
case 4:
ans.clear();
fscanf(queryFile,"%f%f", &x, &y);
start = clock();
ans = windowQuery(min(x, y), max(x,y));
t = float( clock() - start );
fprintf(reportFile, "%d,%f,%d\n", choice, t, ans.size());
fprintf(outputFile,"%d\t%f\t%f\n", choice, x, y);
for(int i = 0; i < ans.size(); i++)
fprintf(outputFile,"%f\n", ans[i]);
break;
}
}
//closing opening files
fflush(fpinfo);
fprintf(fpinfo, "%d %d", rfp, nextPointer);
fclose(fpinfo);
fclose(fp);
fclose(fpdata);
fclose(reportFile);
fclose(outputFile);
fclose(queryFile);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment