Skip to content

Instantly share code, notes, and snippets.

@spaghetti-source
Created June 8, 2013 13:31
Show Gist options
  • Save spaghetti-source/5735183 to your computer and use it in GitHub Desktop.
Save spaghetti-source/5735183 to your computer and use it in GitHub Desktop.
// Bigram graph G = (V,E)
// V: set of words
// E: u->v iff "u v" occurs in the document
//
// Usage: ./a.out < pg11.txt
// ( http://www.gutenberg.org/cache/epub/11/pg11.txt )
//
// the 0.0514497
// and 0.0313457
// to 0.0241298
// of 0.0208681
// a 0.0188344
// it 0.0175057
// i 0.0155607
// she 0.0153838
// you 0.0132718
// in 0.0130744
// said 0.0125918
// alice 0.010906
// that 0.00956025
// was 0.00917664
// as 0.00770203
// her 0.00729531
// with 0.00711062
// at 0.00700045
// all 0.00591824
// or 0.00589347
//
#include <iostream>
#include <vector>
#include <cstdio>
#include <cstdlib>
#include <map>
#include <cmath>
#include <cstring>
#include <numeric>
#include <functional>
#include <algorithm>
#include <tr1/unordered_map>
using namespace std;
using namespace tr1;
#define ALL(c) c.begin(), c.end()
#define FOR(i,c) for(typeof(c.begin())i=c.begin();i!=c.end();++i)
#define REP(i,n) for(int i=0;i<n;++i)
#define fst first
#define snd second
unordered_map<string, int> wordToId;
vector<string> idToWord;
vector< vector<int> > adj;
void addWord(string s) {
if (wordToId.count(s)) return;
int n = idToWord.size();
wordToId[s] = n;
idToWord.push_back(s);
adj.push_back( vector<int>() );
}
void readFile(FILE *fp) {
string s, t;
addWord(t);
for (char c; (c = getchar()) != EOF; ) {
if (isalpha(c)) {
s += tolower(c);
} else if (s != "") {
addWord(s);
adj[ wordToId[t] ].push_back( wordToId[s] );
t = s; s = "";
}
}
}
void pagerank(vector<double> b = vector<double>(adj.size(), 1), double a = 0.85) {
vector<double> x = b;
REP(epoch, 100) {
vector<double> y(x.size());
REP(u, adj.size()) {
int d = adj[u].size();
REP(k, d) {
int v = adj[u][k];
y[v] += x[u] / d;
}
}
double err = 0;
REP(u, x.size()) {
double rhs = a*y[u] + (1-a)*b[u];
err += pow(x[u]-rhs, 2);
x[u] = rhs;
}
err = sqrt(err);
if (err < 1e-6) break;
}
double total = 0;
REP(u, x.size()) total += x[u];
REP(u, x.size()) x[u] /= total;
vector< pair<double, int> > rank;
REP(u, x.size()) rank.push_back( make_pair(x[u], u) );
sort(ALL(rank), greater< pair<double,int> >());
REP(k, min(rank.size(), 20u)) {
string word = idToWord[rank[k].snd];
double score = rank[k].fst;
cout << word << " " << score << endl;
}
}
int main() {
readFile(stdin);
//vector<double> b(adj.size());
//b[ wordToId["alice"] ] = 1;
//pagerank(b);
pagerank();
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment