Skip to content

Instantly share code, notes, and snippets.

@kogemrka
Created December 12, 2011 15:01
Show Gist options
  • Save kogemrka/1467668 to your computer and use it in GitHub Desktop.
Save kogemrka/1467668 to your computer and use it in GitHub Desktop.
#include <iostream>
#include <vector>
#include <memory>
#include <fstream>
#include <exception>
#include <algorithm>
#include <stdexcept>
/* TODO (вот она, десяточка моей мечты)
- Причесать функторы
- Воспользоваться трюком с наследованием для создания псевдонимов
*/
// Only for POD types
template <class T>
void serialize(const T& obj, std::ostream& out)
{
out.write((char*)(&obj), sizeof(obj));
}
template <class T>
void deserialize(T &obj, std::istream& in)
{
in.read((char*)(&obj), sizeof(obj));
}
//for Strings
void serialize(const std::string &obj, std::ostream& out)
{
size_t size = obj.length();
out.write((char*)(&size), sizeof(size_t));
out.write(obj.c_str(), size);
}
void deserialize(std::string &obj, std::istream& in)
{
size_t size;
in.read((char*)(&size), sizeof(size_t));
char *data = new char[size + 1];
try
{
in.read(data, size);
obj.assign(data, size);
}
catch(...)
{
delete [] data;
throw;
}
delete [] data;
}
// for Vectors
template <class T>
void serialize(const std::vector<T>& obj, std::ostream& out)
{
size_t size = obj.size();
out.write((char*) (&size), sizeof(size_t));
for (size_t i = 0; i < obj.size(); ++i)
serialize(obj[i], out);
}
template <class T>
void deserialize(std::vector<T>& obj, std::istream& in)
{
size_t size;
in.read((char*)(&size), sizeof(size_t));
obj.clear();
obj.resize(size);
for (size_t i = 0; i < size; ++i)
deserialize(obj[i], in);
}
template <class T>
class FileKeeper;
template <class T>
class SortSegment
{
public:
void operator() (std::vector<T> &segment)
{
std::sort(segment.begin(), segment.end());
}
};
template <class T>
class SortNextFile
{
public:
SortNextFile(): lastSize(-1) {};
void operator() (std::vector<std::shared_ptr<FileKeeper<T> > > &files)
{
if (lastSize == -1)
{
std::make_heap(files.begin(), files.end(), AntiCmp());
}
else if ((size_t) lastSize == files.size())
std::push_heap(files.begin(), files.end(), AntiCmp());
std::pop_heap(files.begin(), files.end(), AntiCmp());
lastSize = files.size();
}
private:
int lastSize;
struct AntiCmp
{
bool operator() (std::shared_ptr<FileKeeper<T> > a, std::shared_ptr<FileKeeper<T> > b)
{
return *b < *a;
}
};
};
template <class T>
class ReverseSegment
{
public:
void operator() (std::vector<T> &segment)
{
std::reverse(segment.begin(), segment.end());
}
};
template <class T>
class ReverseNextFile
{
public:
void operator() (std::vector<std::shared_ptr<FileKeeper<T> > > &)
{
}
};
template <class T>
class RandomizeSegment
{
public:
void operator() (std::vector<T> &segment)
{
std::random_shuffle(segment.begin(), segment.end());
}
};
template <class T>
class RandomizeNextFile
{
public:
void operator() (std::vector<std::shared_ptr<FileKeeper<T> > > &files)
{
std::swap(files[files.size() - 1], files[rand() % files.size()]);
}
};
template <class T, class SegmentFunc, class NextFileFunc>
class External_Op
{
public:
// static const size_t defaultSize = 1024;
External_Op(SegmentFunc, NextFileFunc, size_t size);
void push(T elem);
T next();
void finish();
bool isEnd() const;
private:
bool isFinished;
void processSegment();
External_Op(const External_Op&);
External_Op & operator=(const External_Op&);
std::vector<std::shared_ptr<FileKeeper<T> > > files;
std::vector<T> buffer;
size_t buffer_size;
SegmentFunc SegmentFunctor;
NextFileFunc NextFileFunctor;
};
template <class T>
class FileKeeper
{
public:
FileKeeper(const std::vector<T> &array);
~FileKeeper();
bool eof() const;
T next();
bool operator < (const FileKeeper<T> &) const;
private:
T lastval;
bool is_eof;
std::string filename;
std::ifstream file;
};
int main()
{
External_Op<std::vector<int>, SortSegment<std::vector<int> >, SortNextFile<std::vector<int> > >
Sorter(SortSegment<std::vector<int> >(), SortNextFile<std::vector<int> >(), 10000);
for (int i = 0; i < 100000; ++i)
{
std::vector<int> v1;
for (int j = 0; j < 3; ++j)
v1.push_back(rand()%256);
Sorter.push(v1);
}
Sorter.finish();
External_Op<std::vector<int>, ReverseSegment<std::vector<int> >, ReverseNextFile<std::vector<int> > >
Reverser(ReverseSegment<std::vector<int> >(), ReverseNextFile<std::vector<int> >(), 10000);
while (!Sorter.isEnd())
Reverser.push(Sorter.next());
Reverser.finish();
while (!Reverser.isEnd())
{
std::vector <int> v = Reverser.next();
for (int i = 0; i < 3; ++i)
std::cout << v[i] << "\t";
std::cout << std::endl;
}
return 0;
}
template <class T, class SegmentFunc, class NextFileFunc>
External_Op<T, SegmentFunc, NextFileFunc>::External_Op(SegmentFunc SegmentProc, NextFileFunc NextFileProc, size_t size):
SegmentFunctor(SegmentProc),
NextFileFunctor(NextFileProc)
{
isFinished = false;
if (size > 0)
{
buffer_size = size;
buffer.reserve(size);
}
else
throw(std::range_error("Size <= 0"));
}
template <class T, class SegmentFunc, class NextFileFunc>
void External_Op<T, SegmentFunc, NextFileFunc>::push(T elem)
{
if (isFinished)
throw(std::runtime_error("Don`t push data after finish"));
if (buffer.size() >= buffer_size)
{
processSegment();
}
buffer.push_back(elem);
}
template <class T, class SegmentFunc, class NextFileFunc>
void External_Op<T, SegmentFunc, NextFileFunc>::finish()
{
if (isFinished)
throw(std::runtime_error("Don`t use finish twice"));
processSegment();
isFinished = true;
}
template <class T, class SegmentFunc, class NextFileFunc>
bool External_Op<T, SegmentFunc, NextFileFunc>::isEnd() const
{
return files.empty();
}
template <class T, class SegmentFunc, class NextFileFunc>
T External_Op<T, SegmentFunc, NextFileFunc>::next()
{
// std::pop_heap(files.begin(), files.end(), AntiCmp());
if (!isFinished)
throw(std::runtime_error("Operation is not finished yet"));
if(isEnd())
throw(std::runtime_error("Object is empty"));
NextFileFunctor(files);
FileKeeper<T> &lastfile = **(--files.end());
T result = lastfile.next();
if (lastfile.eof())
{
files.pop_back();
}
return result;
}
template <class T, class SegmentFunc, class NextFileFunc>
void External_Op<T, SegmentFunc, NextFileFunc>::processSegment()
{
SegmentFunctor(buffer);
files.push_back(std::shared_ptr<FileKeeper<T> >(new FileKeeper<T> (buffer)));
buffer.erase(buffer.begin(), buffer.end());
}
template <class T>
FileKeeper<T>::FileKeeper(const std::vector<T> &array)
{
filename = tmpnam(NULL);
typename std::vector<T>::const_iterator it = array.begin();
lastval = *it;
++it;
try
{
std::ofstream output(filename);
typename std::vector<T>::const_iterator et = array.end();
is_eof = (it == et);
for (; it != et; ++it)
serialize(*it, output);
output.close();
}
catch(...)
{
remove(filename.c_str());
throw;
}
file.open(filename, std::ifstream::binary);
}
template <class T>
bool FileKeeper<T>::eof() const
{
return is_eof;
}
template <class T>
T FileKeeper<T>::next()
{
T result = lastval;
if (!file.eof())
deserialize(lastval, file);
is_eof = file.eof();
return result;
}
template <class T>
bool FileKeeper<T>::operator < (const FileKeeper<T> &rhs) const
{
return lastval < rhs.lastval;
}
template <class T>
FileKeeper<T>::~FileKeeper()
{
file.close();
remove(filename.c_str());
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment