Created
November 15, 2014 19:04
-
-
Save kedarmhaswade/b7189f259427fda09363 to your computer and use it in GitHub Desktop.
NearbyDataset
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.io.PrintStream; | |
import java.io.PrintWriter; | |
import java.util.Random; | |
/** A class to generate somewhat larger test dataset for http://www.quora.com/challenges#nearby | |
* @author Kedar Mhaswade | |
*/ | |
public class NearbyDataset { | |
static final Random r = new Random(); | |
public static void main(String[] args) { | |
build(Integer.valueOf(args[0]), Integer.valueOf(args[1]), Integer.valueOf(args[2]), System.out); | |
} | |
private static void build(int T, int Q, int N, PrintStream out) { | |
out.printf("%d %d %d%n", T, Q, N); | |
double X = 1000.0, Y = 1000.0; | |
buildTopics(T, X, Y, out); | |
int ntq = T; // a question is tagged with about ntq topics | |
buildQuestions(Q, T, ntq, out); | |
int res = 100; // need res results per query | |
buildQueries(N, res, X, Y, out); | |
} | |
private static void buildQueries(int queries, int results, double X, double Y, PrintStream out) { | |
for (int i = 0; i < queries; i++) { | |
//we'll build roughly half topic queries and half question queries for a random location | |
char c = r.nextBoolean() ? 't' : 'q'; | |
int nr = r.nextInt(results + 1); | |
out.printf("%c %d %.2f %.2f%n", c, nr, r.nextDouble()*X, r.nextDouble()*Y); | |
} | |
} | |
private static void buildTopics(int t, double X, double Y, PrintStream out) { | |
//locations are approximately uniformly distributed over 0.0 <= x,y <= 1000000.0 (10^6) | |
//1 <= t <= 10000 | |
t = t > 9999 ? 9999 : t; | |
for (int i = 0; i < t; i++) { | |
double x = r.nextDouble() * X; | |
double y = r.nextDouble() * Y; | |
System.out.printf("%d %.2f %.2f%n", i, x, y); | |
} | |
} | |
private static void buildQuestions(int q, int t, int ntq, PrintStream out) { | |
for (int i = 0; i < q; i ++) { //i is the qid | |
int related = r.nextInt(ntq + 1); | |
out.printf("%d %d", i, related); | |
int[] ints = intArray(t); | |
int len = ints.length; | |
for (int j = 0; j < related; j++) { | |
int pid = r.nextInt(len); | |
out.printf(" %d", ints[pid]); | |
swap(ints, pid, len - 1); | |
len -= 1; | |
} | |
out.println(); | |
} | |
} | |
private static void swap(int[] ints, int pid, int last) { | |
int tmp = ints[pid]; | |
ints[pid] = ints[last]; | |
ints[last] = tmp; | |
} | |
private static int[] intArray(int t) { | |
int [] ints = new int[t]; | |
for (int i = 0; i < t; i++) | |
ints[i] = i; | |
return ints; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment