Skip to content

Instantly share code, notes, and snippets.

@manangandhi7
Created December 28, 2018 17:38
Show Gist options
  • Save manangandhi7/98d2cdd12c7800aaea3ecdc41a7755e3 to your computer and use it in GitHub Desktop.
Save manangandhi7/98d2cdd12c7800aaea3ecdc41a7755e3 to your computer and use it in GitHub Desktop.
External sort in c++
#include <iostream>
#include <fstream>
#include<sstream>
#include <queue>
#include<algorithm>
using namespace std;
class Compare
{
public:
//Ascending order sort
bool operator() (pair<int, int> pair1, pair<int, int> pair2)
{
return pair1.first > pair2.first;
}
};
string ToString(int val) {
stringstream stream;
stream << val;
return stream.str();
}
//merge all sorted files into one
string mergeFiles(int counter) {
string fileA, fileB;
std::priority_queue<pair<int, int>, std::vector<pair<int, int> >, Compare> minHeap;
ifstream* handles = new ifstream[counter];
for (int i = 1; i <= counter; i++) {
string sortedInputFileName = "output" + ToString(i) + ".txt";
handles[i - 1].open(sortedInputFileName.c_str());
int firstValue;
handles[i - 1] >> firstValue; //first value in the file (minimum in the file)
minHeap.push(pair<int, int>(firstValue, i - 1)); //second value in pair keeps track of the file from which the number was drawn
}
string outputFileName = "output.txt";
ofstream outputFile(outputFileName.c_str());
while (minHeap.size() > 0) {
pair<int, int> minPair = minHeap.top();
minHeap.pop();
outputFile << minPair.first << '\n';
int nextValue;
flush(outputFile);
if (handles[minPair.second] >> nextValue) {
minHeap.push(pair <int, int>(nextValue, minPair.second));
}
}
//clean up
for (int i = 1; i <= counter; i++) {
handles[i - 1].close();
}
outputFile.close();
delete[] handles;//free memory
return outputFileName;
}
void sortAndWrite(int *values, int size, int numberOfChunks) {
sort(values, values + size);
string outputFileName = "output" + ToString(numberOfChunks) + ".txt";
ofstream outputFile(outputFileName.c_str()); //output file
for (int i = 0; i < size; i++) {
outputFile << values[i] << '\n';
}
outputFile.close();
}
int main() {
//divide file into chunks of size that fits in your memory
int numberOfChunks = 1;
int maxSizeofMemory = 32;//in bytes
int chunkSize = maxSizeofMemory / sizeof(int); //(4 bytes for integer)
int* inputValues = new int[chunkSize];
int readValue = 0;
int currentCount = 0;
bool unprocessedData = true;
ifstream inputFile("input.txt");
while (inputFile >> readValue) {
unprocessedData = true;
inputValues[currentCount++] = readValue;
if (currentCount == chunkSize) {
sortAndWrite(inputValues, currentCount, numberOfChunks);
numberOfChunks++;
currentCount = 0;
unprocessedData = false;
}
}
if (unprocessedData) {
sortAndWrite(inputValues, currentCount, numberOfChunks);
}
else {
numberOfChunks--;
}
inputFile.close();
delete[] inputValues; //free memory
if (numberOfChunks != 0) {
std::priority_queue<pair<int, int>, std::vector<pair<int, int> >, Compare> minHeap;
cout << "output is in file : " << mergeFiles(numberOfChunks);
}
return 0;
}
@bezchastnyi
Copy link

Твой алгоритм - гавно

@manangandhi7
Copy link
Author

Твой алгоритм - гавно

તો પછી જાતે કરો.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment