Skip to content

Instantly share code, notes, and snippets.

@ducalpha
Forked from ducss2015/point_of_max_overlap.cpp
Last active October 23, 2015 05:47
Show Gist options
  • Save ducalpha/0534aa9c1a9113b83cd6 to your computer and use it in GitHub Desktop.
Save ducalpha/0534aa9c1a9113b83cd6 to your computer and use it in GitHub Desktop.
Find the point of max overlap, given a number of intervals
/*You are given large numbers of logs, each one of which contains a start time (long), end time (long) and memory usage (int). The time is recorded as MMDDHH (100317 means October 3rd 5PM) Write an algorithm that returns a specific time (hour) when the memory usage is the highest. If there are multiple answers, return the first hour.
*/
#include <cstdio>
#include <sstream>
#include <fstream>
#include <map>
#include <vector>
using namespace std;
struct MemoryUsage {
long start_time, end_time;
int memory_usage;
};
class FileSet {
public:
FileSet(const vector<string>& file_names_)
: file_names_(file_names_) {}
// return true if successfully read a data
bool GetNext(MemoryUsage& usage) {
while (curr_file_index_ < file_names_.size()) {
if (!curr_file_.is_open()) curr_file_.open(file_names_[curr_file_index_]);
// populate data
string line;
if (getline(curr_file_, line)) {
istringstream linestream(line);
linestream >> usage.start_time >> usage.end_time >> usage.memory_usage;
return true;
}
curr_file_.close();
++curr_file_index_;
}
return false;
}
private:
vector<string> file_names_;
ifstream curr_file_;
int curr_file_index_ = 0;
};
long FindMaxMemoryUsageStartTime(map<long, int>& time_usage_map) {
int previous_usage = time_usage_map.begin()->second, max_usage = previous_usage;
long max_usage_start_time = time_usage_map.begin()->first, max_usage_end_time = max_usage_start_time;
for (map<long, int>::iterator it = next(time_usage_map.begin()); it != time_usage_map.end(); ++it) {
it->second += previous_usage;
previous_usage = it->second;
if (it->second > max_usage) {
max_usage = it->second;
max_usage_start_time = it->first;
max_usage_end_time = next(it)->first - 1; // we are sure that the max usage is before the last point
}
}
printf("Memory usage:\n");
for (map<long, int>::iterator it = time_usage_map.begin(); it != time_usage_map.end(); ++it)
printf("%ld %d\n", it->first, it->second);
printf("Max memory usage: %d (from %ld to %ld)\n", max_usage, max_usage_start_time, max_usage_end_time);
return max_usage_start_time;
} // O(2n) elements, the map traversal is O(n)
void PopulateTimeUsages(FileSet& file_set, map<long, int>& time_usage_map) {
// add the time-memory_usage to the map one by one
MemoryUsage curr;
while (file_set.GetNext(curr)) { // Next return false when there is no available data
time_usage_map[curr.start_time] += curr.memory_usage;
time_usage_map[curr.end_time + 1] += -curr.memory_usage;
}
} // O(n logn) (2*n operations) * log n of tree searching
long FindMaxMemoryUsage(const vector<string>& log_files) {
FileSet file_set(log_files);
map<long, int> time_usage_map; // map from start/end_time to memory usage, represents cumulative memory usage
PopulateTimeUsages(file_set, time_usage_map);
return FindMaxMemoryUsageStartTime(time_usage_map);
}
// n log lines
// O(n log n) + O(n) = O(n logn)
/* Test
1 3 2
2 4 2
time_usage_map: 1 2, 2 2, 4 -2, 5 -2
previous = 2, max_usage = 2, start_time = 0
map: 1 2, 2 4, 4 -2, 5 -2
previous = 4, max_usage = 4, start_time = 2
map: 1 2, 2 4, 4 2, 5 -2
previous = 2, max_usage = 4, start_time = 2
map: 1 2, 2 4, 4 2, 5 0
previous = 0, max_usage = 4, start_time = 2
*/
int main() {
vector<string> log_files{"log_file3"};
//vector<string> log_files{"log_file", "log_file2"};
long max_start_time = FindMaxMemoryUsage(log_files);
printf("Max start time: %ld", max_start_time);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment