Skip to content

Instantly share code, notes, and snippets.

@iwiwi
Created June 10, 2013 12:32
Show Gist options
  • Save iwiwi/5748383 to your computer and use it in GitHub Desktop.
Save iwiwi/5748383 to your computer and use it in GitHub Desktop.
Convert edge lists described by names to those described by 0-indexed vertex IDs
/*
* Basic usage:
* $ CXXFLAGS=-O3 make shrink_ids
* $ ./shrink_ids < EDGES > SHRINKED_EDGES
*
* Example:
* $ echo "hoge piyo\nfuga hoge" | ./shrink_ids
* 0 1
* 2 0
*
* Options:
* -f TARGET_FIELDS --- Specify target fields (just like 'cut' command)
* -v OUTPUT_VERTEX_NAME_FILE --- Output names of vertices
*
* More examples:
* $ echo "01:00 piyo fuga\n02:15 fuga hoge" | ./shrink_ids -f 2,3 -v tmp.txt
* 01:00 0 1
* 02:15 1 2
* $ cat tmp.txt
* piyo
* fuga
* hoge
*/
#include <cstdio>
#include <cstring>
#include <cstdlib>
#include <fstream>
#include <iostream>
#include <sstream>
#include <algorithm>
#include <map>
#include <vector>
using namespace std;
int main(int argc, char **argv) {
std::ios_base::sync_with_stdio();
//
// Parse commandline frags
//
vector<int> option_target_fields(2);
option_target_fields[0] = 1;
option_target_fields[1] = 2;
ofstream ofs_vertices;
for (int i = 1; i < argc; ++i) {
if (strncmp(argv[i], "-f", 2) == 0) {
string s(argv[i] + 2);
if (s.length() == 0) {
if (++i >= argc) {
fprintf(stderr, "ERROR: Bad args (after '-f')\n");
exit(EXIT_FAILURE);
}
s = argv[i];
}
for (size_t j = 0; j < s.length(); ++j) {
if (s[j] == ',') s[j] = ' ';
}
istringstream ss(s);
option_target_fields.clear();
for (int f; ss >> f; ) {
option_target_fields.push_back(f);
}
if (!ss.eof() || ss.bad()) {
fprintf(stderr, "Error: Bad args (after '-f')\n");
exit(EXIT_FAILURE);
}
} else if (strncmp(argv[i], "-v", 2) == 0) {
if (++i >= argc) {
fprintf(stderr, "ERROR: Bad args (after '-v')\n");
exit(EXIT_FAILURE);
}
ofs_vertices.open(argv[i]);
if (!ofs_vertices) {
perror("Error: Failed to open file");
exit(EXIT_FAILURE);
}
} else {
fprintf(stderr, "Error: Bad args\n");
exit(EXIT_FAILURE);
}
}
sort(option_target_fields.begin(), option_target_fields.end());
option_target_fields.erase(unique(option_target_fields.begin(), option_target_fields.end()),
option_target_fields.end());
//
// Shrink
//
map<string, int> ma;
size_t num_e = 0;
for (string line; getline(cin, line); num_e++) {
istringstream ss(line);
string s;
for (int f = 1; ss >> s; ++f) {
if (f > 1) cout << "\t";
if (binary_search(option_target_fields.begin(), option_target_fields.end(), f)) {
pair<map<string, int>::iterator, bool> p = ma.insert(make_pair(s, ma.size()));
cout << p.first->second;
if (p.second && ofs_vertices.is_open()) ofs_vertices << s << endl;
} else {
cout << s;
}
}
cout << endl;
}
cerr << "Number of Vertices: " << ma.size() << endl;
cerr << "Number of Edges: " << num_e << endl;
exit(EXIT_SUCCESS);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment