Skip to content

Instantly share code, notes, and snippets.

@sammdot
Last active March 19, 2017 08:17
Show Gist options
  • Save sammdot/2987ddb4ffd2f4a4f5781864cc07e34f to your computer and use it in GitHub Desktop.
Save sammdot/2987ddb4ffd2f4a4f5781864cc07e34f to your computer and use it in GitHub Desktop.
2PMMS starter code
#include "merge.h"
int main (int argc, char **argv) {
//process and validate command-line arguments
MergeManager manager;
//initialize all fields according to the input and the results of Phase I
return merge_runs (&manager);
}
#include <stdlib.h>
#include "merge.h"
//manager fields should be already initialized in the caller
int merge_runs (MergeManager * merger){
int result; //stores SUCCESS/FAILURE returned at the end
//1. go in the loop through all input files and fill-in initial buffers
if (init_merge (merger)!=SUCCESS)
return FAILURE;
while (merger->heap_size > 0) { //heap is not empty
HeapElement smallest;
Record next; //here next comes from input buffer
if(get_top_heap_element (merger, &smallest)!=SUCCESS)
return FAILURE;
result = get_next_input_element (merger, smallest.run_id, &next);
if (result==FAILURE)
return FAILURE;
if(result==SUCCESS) {//next element exists, may also return EMPTY
if(insert_into_heap (merger, smallest.run_id, &next)!=SUCCESS)
return FAILURE;
}
merger->output[merger->output_pos].UID1=smallest.UID1;
merger->output[merger->output_pos].UID2=smallest.UID2;
merger->output_pos++;
//staying on the last slot of the output buffer - next will cause overflow
if(merger->output_pos == merger->output_capacity) {
if(flush_output_buffer(merger)!=SUCCESS) {
return FAILURE;
merger->output_pos = 0;
}
}
}
//flush what remains in output buffer
if(merger->output_pos > 0) {
if(flush_output_buffer(merger)!=SUCCESS)
return FAILURE;
}
clean_up(merger);
return SUCCESS;
}
int get_top_heap_element (MergeManager * merger, HeapElement * result){
HeapElement item;
int child, parent;
if(merger->heap_size == 0){
printf( "UNEXPECTED ERROR: popping top element from an empty heap\n");
return FAILURE;
}
*result=merger->heap[0]; //to be returned
//now we need to reorganize heap - keep the smallest on top
item = merger->heap [--merger->heap_size]; // to be reinserted
parent =0;
while ((child = (2 * parent) + 1) < merger->heap_size) {
// if there are two children, compare them
if (child + 1 < merger->heap_size &&
(compare_heap_elements(&(merger->heap[child]),&(merger->heap[child + 1]))>0))
++child;
// compare item with the larger
if (compare_heap_elements(&item, &(merger->heap[child]))>0) {
merger->heap[parent] = merger->heap[child];
parent = child;
}
else
break;
}
merger->heap[parent] = item;
return SUCCESS;
}
int insert_into_heap (MergeManager * merger, int run_id, Record *input){
HeapElement new_heap_element;
int child, parent;
new_heap_element.UID1 = input->UID1;
new_heap_element.UID2 = input->UID2;
new_heap_element.run_id = run_id;
if (merger->heap_size == merger->heap_capacity) {
printf( "Unexpected ERROR: heap is full\n");
return FAILURE;
}
child = merger->heap_size++; /* the next available slot in the heap */
while (child > 0) {
parent = (child - 1) / 2;
if (compare_heap_elements(&(merger->heap[parent]),&new_heap_element)>0) {
merger->heap[child] = merger->heap[parent];
child = parent;
}
else
break;
}
merger->heap[child]= new_heap_element;
return SUCCESS;
}
/*
** TO IMPLEMENT
*/
int init_merge (MergeManager * manager) {
return SUCCESS;
}
int flush_output_buffer (MergeManager * manager) {
return SUCCESS;
}
int get_next_input_element(MergeManager * manager, int file_number, Record *result) {
return SUCCESS;
}
int refill_buffer (MergeManager * manager, int file_number) {
return SUCCESS;
}
void clean_up (MergeManager * merger) {
}
#ifndef MERGE_H
#define MERGE_H
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAX_PATH_LENGTH 1024
//return values of all functions
#define SUCCESS 0
#define FAILURE 1
#define EMPTY 2
typedef struct record {
int UID1;
int UID2;
} Record;
typedef struct HeapElement {
int UID1;
int UID2;
int run_id;
} HeapElement;
typedef struct input_buffer {
int capacity;
long run_length;
long file_pos;
long buffer_pos;
long records;
int done;
Record *buffer;
} InputBuffer;
typedef struct merge_manager {
HeapElement *heap;
int heap_size;
int heap_capacity;
FILE *in_fp;
FILE *out_fp;
Record *output;
int output_pos;
int output_capacity;
InputBuffer *buffers;
} MergeManager;
//1. main loop
int merge_runs (MergeManager * manager);
//2. creates and fills initial buffers, initializes heap taking 1 top element from each buffer
int init_merge (MergeManager * manager);
//3. flushes output buffer to disk when full
int flush_output_buffer (MergeManager * manager);
//4. returns top heap element, rearranges heap nodes
int get_top_heap_element (MergeManager * manager, HeapElement * result);
//5. inserts new element into heap, rearranges nodes to keep smallest on top
int insert_into_heap (MergeManager * manager, int run_id, Record *input);
//6. reads next element from an input buffer to transfer it to the heap. Uploads records from disk if all elements are processed
int get_next_input_element(MergeManager * manager, int file_number, Record *result);
//7. refills input buffer from the corresponding run
int refill_buffer (MergeManager * manager, int file_number);
//8. Frees all dynamically allocated memory
void clean_up (MergeManager * merger);
//9. Application-specific comparison function
int compare_heap_elements (HeapElement *a, HeapElement *b);
#endif
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment