Skip to content

Instantly share code, notes, and snippets.

@wenhuizhang
Last active August 29, 2015 14:14
Show Gist options
  • Save wenhuizhang/d5c7a4601bc0e5cae696 to your computer and use it in GitHub Desktop.
Save wenhuizhang/d5c7a4601bc0e5cae696 to your computer and use it in GitHub Desktop.
DataIncubator
/*
Q1: There is a subway car with N adjacent seats in a row.
People walk into the subway and choose a random available seat (drawn uniformly).
The only constraint on seat availability is that they do not like to sit next to one another
so there is always (at least) one empty seat between any two individuals.
This process continues until all available seats are taken.
What is the mean and standard deviation of the fraction of occupied seats
(when the process is complete) for different values of N?
Give the answer with 10 digits of significance.
*/
#include<stdlib.h> /* srand, rand */
#include <stdio.h> /* printf, NULL */
#include <time.h> /* time */
#include <math.h> /*sqrt and other calculation*/
#include <string.h> /*memset*/
double find_seat( int n )
{
if(n == 1) find_seat(n) = 0;
if(n == 2) find_seat(n) = 1;
if(n == 3) find_seat(n) = 1;
srand( time(NULL) ); // using time as core for rand
int random = rand() % 1;
if(n > 3) {
if (random == 1) find_seat(n) = find_seat(n-2) + find_seat(2);
else find_seat(n) = find_seat(n-3) + find_seat(3);
return find_seat(n);
}
double mean(double data[], int n)
{
double mean;
for(int i = 0; i < n ; i++)
{
mean += data[i];
}
mean = mean / n;
return mean;
}
double standard_deviation(double data[], int n)
{
double sum_deviation;
double mean;
for(int i = 0; i < n ; i++)
{
mean += data[i];
}
mean = mean / n;
for(int i = 0 ; i < n ; i++){
sum_deviation += ( data[i] - mean ) * ( data[i] - mean );
}
return sqrt( sum_deviation / n);
}
int main(){
int m = 3000; //iterations to get the final result
int n = 25;
//int n = 50000;
double data[m];
for (int i = 0; i < m; i++){
data[i] = double(find_seat(n)) / n;
printf("%d th simulation of occupation rate for %d seats is %.10lf \n", i, n, data[i]);
}
printf("mean for %d seats is %.10lf\n" , n, mean(data, m));
printf("standard devition for %d seats is %.10lf\n" , n, standard_deviation(data, m));
}
/*
Q1: There is a subway car with N adjacent seats in a row.
People walk into the subway and choose a random available seat (drawn uniformly).
The only constraint on seat availability is that they do not like to sit next to one another
so there is always (at least) one empty seat between any two individuals.
This process continues until all available seats are taken.
What is the mean and standard deviation of the fraction of occupied seats
(when the process is complete) for different values of N?
Give the answer with 10 digits of significance.
*/
#include<stdlib.h> /* srand, rand */
#include <stdio.h> /* printf, NULL */
#include <time.h> /* time */
#include <math.h> /*sqrt and other calculation*/
#include <string.h> /*memset*/
bool find_seat( int numSeats, int &numFree, int *seats ) {
if ( numFree <= 0 ) {
return false;
}
int count = rand() % numFree;
int index = -1;
for ( int i = 0; i < numSeats; i++ ) {
if ( seats[ i ] == 0 ) {
if ( count == 0 ) {
index = i;
break;
}
count--;
}
}
seats[ index ] = 1;
numFree--;
if ( index > 0 && seats[ index - 1 ] == 0 ) {
seats[ index - 1 ] = 2;
numFree--;
}
if ( index < numSeats - 1 && seats[ index + 1 ] == 0 ) {
seats[ index + 1 ] = 2;
numFree--;
}
return true;
}
int sim( int num ) {
int seats[ num ];
memset( seats, 0, sizeof( int ) * num );
int numFree = num;
int numPeople = 0;
while( find_seat( num, numFree, seats ) ) {
numPeople++;
}
for ( int i = 0; i < num; i++ ) {
printf( "%d ", seats[ i ] == 1 );
}
printf( "\n" );
return numPeople;
}
double mean(double data[], int n)
{
double mean;
for(int i = 0; i < n ; i++)
{
mean += data[i];
}
mean = mean / n;
return mean;
}
double standard_deviation(double data[], int n)
{
double sum_deviation;
double mean;
for(int i = 0; i < n ; i++)
{
mean += data[i];
}
mean = mean / n;
for(int i = 0 ; i < n ; i++){
sum_deviation += ( data[i] - mean ) * ( data[i] - mean );
}
return sqrt( sum_deviation / n);
}
int main(){
int m = 3000; //iterations to get the final result
int n = 25;
//int n = 50000;
double data[m];
for (int i = 0; i < m; i++){
data[i] = double(sim(n)) / n;
printf("%d th simulation of occupation rate for %d seats is %.10lf \n", i, n, data[i]);
}
printf("mean for %d seats is %.10lf\n" , n, mean(data, m));
printf("standard devition for %d seats is %.10lf\n" , n, standard_deviation(data, m));
}
/*
time,user,category
2014-01-01 00:00:00,21155349,2
2014-01-01 00:00:00,56347479,6
2014-01-01 00:00:00,68429517,13
*/
#include <iostream>
#include <sstream>
#include <string>
#include <fstream>
#include <vector>
using namespace std;
int main(){
ifstream infile("/Users/WenhuiZhang/Desktop/hits.csv");
string line = "";
int i;
while (getline(infile, line)){
/*parse function*/
istringstream ss(line);
string token;
int user;
int category;
int j=0;
while(std::getline(ss, token, '\t')) {
if(j == 0){
ss >> token;
user = stoi(token);
//std::cout << user << '\n';
}
if(j == 1){
ss >> token;
category = stoi(token);
//std::cout << category << '\n';
}
j++;
}
int hour = stoi(line.substr (11, 2));
int minute = stoi(line.substr (14, 2));
int second = stoi(line.substr (17, 2));
//string users= line.substr (20, 8);
//string category= line.substr (29, 2);
//cout << category << '\n';
int time = hour*3600+minute*60+second;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment