Skip to content

Instantly share code, notes, and snippets.

@ivan-ushakov
Last active March 23, 2017 17:44
Show Gist options
  • Save ivan-ushakov/2d048afba76cc2e108eea01be306e644 to your computer and use it in GitHub Desktop.
Save ivan-ushakov/2d048afba76cc2e108eea01be306e644 to your computer and use it in GitHub Desktop.
Calculate frequency of words in text. Version without std containers.
// requires C++11, compile with -std=c++11 flag
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <algorithm>
namespace t1
{
template <typename T>
class Vector
{
T *data_;
size_t size_;
size_t cursor_;
public:
Vector() : data_(nullptr), size_(0), cursor_(0)
{
}
Vector(size_t size) : Vector()
{
data_ = new T[size];
size_ = size;
}
Vector(const Vector<T> &other)
{
data_ = new T[other.size_];
size_ = other.size_;
cursor_ = other.cursor_;
for (size_t i = 0; i < other.size(); i++)
{
data_[i] = other.data_[i];
}
}
Vector<T> &operator=(Vector<T> other)
{
swap(*this, other);
return *this;
}
void push_back(const T &value)
{
if (cursor_ == size_)
{
Vector<T> t(size_t(1.2 * size_));
for (size_t i = 0; i < size_; i++)
{
t.data_[i] = data_[i];
}
t.cursor_ = cursor_;
swap(*this, t);
}
data_[cursor_] = value;
cursor_ = cursor_ + 1;
}
T &operator[](size_t index)
{
return data_[index];
}
const T &operator[](size_t index) const
{
return data_[index];
}
void clear()
{
cursor_ = 0;
}
size_t size() const
{
return cursor_;
}
T* data()
{
return data_;
}
~Vector()
{
if (data_ != nullptr)
{
delete[] data_;
}
}
private:
void swap(Vector<T> &a, Vector<T> &b)
{
{
T *t = a.data_;
a.data_ = b.data_;
b.data_ = t;
}
{
size_t t = a.size_;
a.size_ = b.size_;
b.size_ = t;
}
{
size_t t = a.cursor_;
a.cursor_ = b.cursor_;
b.cursor_ = t;
}
}
};
struct String
{
char *data;
size_t size;
String() : data(nullptr), size(0)
{
}
String(char *data, size_t size) : data(data), size(size)
{
}
};
bool operator==(const String &a, const String &b)
{
if (a.size != b.size)
{
return false;
}
return memcmp(a.data, b.data, a.size) == 0;
}
bool operator<(const String &a, const String &b)
{
size_t size = a.size < b.size ? a.size : b.size;
int r = memcmp(a.data, b.data, size);
if (r < 0)
{
return true;
}
if (r > 0)
{
return false;
}
return a.size < b.size;
}
struct Counter
{
String word;
int value;
void increase()
{
value = value + 1;
}
};
class File
{
FILE *file_;
bool error_;
char *buffer_;
size_t begin_;
size_t end_;
static const size_t BufferSize = 2 * 4096;
public:
File(const char *name, const char *mode) : file_(NULL), error_(false)
{
file_ = fopen(name, mode);
buffer_ = new char[BufferSize];
begin_ = 0;
end_ = 0;
}
char get()
{
if (begin_ == end_)
{
begin_ = 0;
end_ = fread(buffer_, sizeof(char), BufferSize, file_);
if (end_ == 0)
{
error_ = true;
return 0;
}
}
return buffer_[begin_++];
}
bool fail() const
{
return file_ == NULL || error_;
}
void write(t1::Counter &counter)
{
fprintf(file_, "%d ", counter.value);
fwrite(counter.word.data, sizeof(char), counter.word.size, file_);
fprintf(file_, "\n");
}
~File()
{
if (file_ != NULL)
{
fclose(file_);
}
if (buffer_ != nullptr)
{
delete[] buffer_;
}
}
};
class Guard
{
const Vector<String> &vector_;
public:
Guard(const Vector<String> &vector) : vector_(vector)
{
}
~Guard()
{
for (size_t i = 0; i < vector_.size(); i++)
{
if (vector_[i].data != nullptr)
{
delete[] vector_[i].data;
}
}
}
};
}
int main(int argc, char const *argv[])
{
if (argc != 3)
{
printf("usage: freqs in.txt out.txt\n");
return 0;
}
t1::File input(argv[1], "r");
if (input.fail())
{
printf("error: can't read input file\n");
return -1;
}
t1::Vector<t1::String> words(8);
t1::Guard guard(words);
t1::Vector<char> buffer(16);
while (!input.fail())
{
int c = input.get();
if (isalpha(c))
{
buffer.push_back(tolower(c));
}
else
{
if (buffer.size() > 0)
{
char *word = new char[buffer.size()];
memcpy(word, buffer.data(), buffer.size());
words.push_back(t1::String(word, buffer.size()));
buffer.clear();
}
}
}
std::sort(words.data(), words.data() + words.size(), [](const t1::String &a, const t1::String &b) {
return a < b;
});
t1::Vector<t1::Counter> counters(8);
t1::Counter counter;
counter.word = words[0];
counter.value = 1;
for (size_t i = 1; i < words.size(); i++)
{
t1::String &s = words[i];
if (counter.word == s)
{
counter.increase();
}
else
{
counters.push_back(counter);
counter.word = s;
counter.value = 1;
}
}
counters.push_back(counter);
std::sort(counters.data(), counters.data() + counters.size(), [](const t1::Counter &a, const t1::Counter &b) {
if (a.value < b.value)
{
return false;
}
if (a.value > b.value)
{
return true;
}
return a.word < b.word;
});
t1::File output(argv[2], "w");
if (output.fail())
{
printf("error: can't create output file\n");
return -1;
}
for (size_t i = 0; i < counters.size(); i++)
{
output.write(counters[i]);
}
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment