Skip to content

Instantly share code, notes, and snippets.

@spaghetti-source
Last active January 4, 2016 05:49
Show Gist options
  • Save spaghetti-source/8578210 to your computer and use it in GitHub Desktop.
Save spaghetti-source/8578210 to your computer and use it in GitHub Desktop.
MDS for GHCN dataset
//
// http://www.ncdc.noaa.gov/ghcnm/v3.php
//
#include <iostream>
#include <vector>
#include <sstream>
#include <cstdio>
#include <cstdlib>
#include <map>
#include <cmath>
#include <cstring>
#include <functional>
#include <algorithm>
#include <unordered_map>
using namespace std;
#define fst first
#define snd second
template <class T>
T parse(char *s, char *t) {
stringstream ss;
for (; s != t; ++s) ss << *s;
T ret;
ss >> ret;
return ret;
}
template <>
string parse(char *s, char *t) {
return string(s, t);
}
struct city {
string id;
double latitude, longitude;
string name;
};
ostream& operator<<(ostream &os, const city &c) {
os << "[" << c.name << "(" << c.id << "); " << c.latitude << "," << c.longitude << "]";
return os;
}
unordered_map<string, city> cities;
double great_circle_distance(double phi1, double lam1, double phi2, double lam2) {
double dphi = abs(phi1 - phi2);
double dlam = abs(lam1 - lam2);
double num = sqrt(pow(cos(phi2)*sin(dlam),2)
+pow(cos(phi1)*sin(phi2)-sin(phi1)*cos(phi2)*cos(dlam),2));
double den = sin(phi1)*sin(phi2) + cos(phi1)*cos(phi2)*cos(dlam);
return atan2(num, den);
}
double distance(city a, city b) {
static const double c = atan(1.0)/180./4;
return great_circle_distance(c*a.latitude, c*a.longitude,
c*b.latitude, c*b.longitude);
}
void read_inv(const char *file) {
FILE *fp = fopen(file, "r");
for (char line[1024]; fgets(line, sizeof(line), fp); ) {
city c = {parse<string>(line+ 0, line+11),
parse<double>(line+12, line+20),
parse<double>(line+21, line+30),
parse<string>(line+38, line+68)};
cities[c.id] = c;
}
}
vector<vector<double>> mds(vector<vector<double>> D, size_t rank = 2) {
size_t n = D.size();
for (size_t i = 0; i < n; ++i)
for (size_t j = 0; j < n; ++j)
D[i][j] *= D[i][j];
vector<vector<double>> B = D;
double total = 0;
vector<double> rowsum(n), colsum(n);
for (size_t i = 0; i < n; ++i) {
for (size_t j = 0; j < n; ++j) {
rowsum[i] += D[i][j];
colsum[j] += D[i][j];
total += D[i][j];
}
}
for (size_t i = 0; i < n; ++i)
for (size_t j = 0; j < n; ++j)
B[i][j] = ((rowsum[i] + colsum[j] - total/n)/n - D[i][j])/2;
vector<double> lambda(rank);
vector<vector<double>> curr(n, vector<double>(rank));
for (size_t i = 0; i < n; ++i)
for (size_t r = 0; r < rank; ++r)
curr[i][r] = rand() / (1.0 + RAND_MAX);
for (size_t iter = 0; iter < 10; ++iter) {
vector<vector<double>> next(n, vector<double>(rank));
for (size_t i = 0; i < n; ++i)
for (size_t j = 0; j < n; ++j)
for (size_t r = 0; r < rank; ++r)
next[i][r] += B[i][j] * curr[j][r];
for (size_t r = 0; r < rank; ++r)
lambda[r] = next[0][r] / curr[0][r];
next.swap(curr);
for (size_t r = 0; r < rank; ++r) {
for (size_t s = 0; s < r; ++s) {
double dot = 0;
for (size_t i = 0; i < n; ++i)
dot += curr[i][r] * curr[i][s];
for (size_t i = 0; i < n; ++i)
curr[i][r] -= dot * curr[i][s];
}
double norm = 0;
for (size_t i = 0; i < n; ++i)
norm += curr[i][r] * curr[i][r];
norm = sqrt(norm);
for (size_t i = 0; i < n; ++i)
curr[i][r] /= norm;
}
}
for (size_t r = 0; r < rank; ++r)
lambda[r] = sqrt(max(0.0, lambda[r]));
for (size_t i = 0; i < n; ++i)
for (size_t r = 0; r < rank; ++r)
curr[i][r] *= lambda[r];
return curr;
}
void make_map(string header, size_t rank = 2) {
vector<city> region;
for (auto p: cities)
if (p.fst.substr(0,header.size()) == header)
region.push_back(p.snd);
size_t n = region.size();
vector< vector<double> > D(n, vector<double>(n));
for (size_t i = 0; i < n; ++i)
for (size_t j = 0; j < i; ++j)
D[i][j] = D[j][i] = distance(region[i], region[j]);
vector<vector<double>> position = mds(D, rank);
for (size_t i = 0; i < n; ++i) {
for (size_t r = 0; r < rank; ++r)
cout << position[i][r] << " ";
cout << endl;
}
}
int main(int argc, char *argv[]) {
read_inv(argv[1]);
//make_map("210", 2); // japan
make_map("", 2); // world
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment