sohale/thread_example1.cpp

## thread_example1.cpp
/*
Copyright Sohail 2016

This visualises how the actual order of execution varies in a multi-threaded program, in a practical situation.
A central problem in multithreaded is the fact that we cannot predict (and there is absolutely no guarantee regarding) the sequence in which the actual execution is performed.

Features example use of std::thread(), std::mutex, std::unique_lock<>, and the methods join(), sleep_for(), sleep_until().

Four simple experiments in multi-thread programming in C++11/14.
The 4 Experiments in the reverse order:
1. A minimal thread program. Tuns one single thread and waits for it!
2. Runs a par-for with the simplest wait mechanism using join().
3. Runs a par-for where eah thread (task) also has an argument.
4. Runs an experiment many (300) times. Each time, 100 tasks are executed, each one takes 1msec. A hiccup in execusion is manifested in the actual sequence of execution, which is due to the capacity-expantion trigger of std::vector<>, which happens at some threads. This std::vector<> is a shared resource.


How to run it:

    # Note the -lpthread  must to be AFTER filename.cpp.
    g++ --pedantic -std=c++14 -Wall   -lpthread  $1  -lpthread  -o $1.out
    ./$1.out >seq.txt
    gnuplot -p -e "plot [0:110] [0:110] 'seq.txt' "
    gnuplot -p -e "plot [0:110] [-13:30] 'seq.txt' "
    subl seq.txt  # visualises the hiccup in the sequence
    ...

You will need g++, gnuplot, subl.
*/
#include <string>
#include <vector>
#include <iostream>

#include <iomanip>  // for setw()
#include <cmath>

#include <thread>
#include <mutex>

#include <chrono>
using std::this_thread::sleep_for;
using namespace std::chrono_literals;  // ns, us, ms, s, h, etc.


/* ***********************************************
    EXPERIMENT 4
    Run a function many (100) times in parallel. Repeat the experiment 300 times.
    Each time the order of execution is different and often there is a jitter in the order of execution.
    Plot the overal "sequence jitter" (deviation in the order of execution) using gnuplot.
    How to plot the results:
        gnuplot -p -e "plot [0:110] [0:110] 'seq.txt' "
        gnuplot -p -e "plot [0:110] [-13:30] 'seq.txt' "
************************************************ */

// The shared resource:
std::mutex mutx;  // one mutex, many locks
std::vector<int> sequence;

/*
    Wait 1 milliseconds and leave a mark. During this time the mutex is locked, to simulate a process that takes 1misec to compute.
    The order of the marks will later determin the order of execusion of the thread.
*/
void output_int(int arg) {
    /*
    This crashes wihout a MUTEX.
    First, as an experiment, I deliberately tried this without a mutex. It crashed. It was completely fixed using a mutex. It is reliable.
    The crash message was:

    *** Error in `./thread_example1.cpp.out': free(): invalid next size (fast): 0x00007fa5700008c0 ***
    Aborted (core dumped)
    */

    // Lock: creates one instane of the lock for the shared mutex.
    std::unique_lock<std::mutex> this_locking_instance{mutx};

    // Simulate a time-consuming compute
    sleep_for(1000ns);
    /*
    Other possibilities of timing of thread:
        using std::this_thread::sleep_for;     // sleep_for, sleep_until
        using std::chrono_literals::ns; // ns, us, ms, s, h, etc.
        using std::chrono_literals::ms; // ns, us, ms, s, h, etc.

        sleep_for(10ns);
        sleep_until(system_clock::now() + 1s);

    */

    // Leave the mark for later analysis.
    sequence.push_back(arg);

    // In the end, the lock is realease automatically implicitly
    //   as the variable this_locking_instance goes out of scope.
}

/*
    Prints the deviations of the sequence of execution (variance) from the naive expectation.
*/
void print_delta_sequence(const std::vector<int> & sequence) {
    int ctr = 0;
    for (auto i = sequence.begin(), e = sequence.end(); i < e; ++i, ++ctr) {
        int delta = *i - ctr;
        if (delta == 0)
            std::cout << std::setw(3) << ".";
        else
            std::cout << std::setw(3) << delta;
        // cout << format("%-20s %-20s %-8s\n")  % "Artist" % "Title" % "Price";
    }
    std::cout << std::endl;
}

/*
    Prints the actual sequence. Not used for now.
*/
void print_sequence(const std::vector<int> & sequence) {
    std::cout << "size: " << sequence.size() << std::endl;
    for (auto i = sequence.begin(), e = sequence.end(); i < e; ++i) {
        std::cout << *i << " ";
    }
    std::cout << std::endl;
}


/*
    Runs the experiment once.
*/
void generate_sequence(int count) {
    sequence.resize(0);
    // Adding the following line will make the timings very smooth and uniform ! Otherwise, you can see a hiccup in the plotted result.
    // sequence.reserve(count);
    std::vector<std::thread> thread_vector;
    for (int i=0; i < count; ++i) {
        int arg = i;
        std::thread t1 {output_int, arg};
        thread_vector.push_back(std::move(t1));  // wont work without move
    }
    for (auto th_i = thread_vector.begin(), e = thread_vector.end(); th_i < e; ++th_i) {
        th_i->join();
    }
}

/*
    Run the experiment multiple times (one at a time). Stores the results in seq_seq.
*/
std::vector<std::vector<int>> seq_seq;

int main() {
    const int ROWS = 300;  // trials, repeats of the experiment.
    const int COLS = 100;  // The size of each experiment
    for (int i = 0; i < ROWS; ++i ) {
        generate_sequence(COLS);
        seq_seq.push_back(sequence);
    }

    // Mute the text output
    if (false) {
        for (auto i = seq_seq.begin(), e = seq_seq.end(); i < e; ++i) {
            print_delta_sequence(*i);
        }
    }

    // Average jitter at each "sequence index". Here the Time is the sequence index.
    // So it is aerage deviation rather than a histogram.

    std::array<float, COLS> psth;
    for (std::array<float, COLS>::size_type trial = 0; trial < static_cast<std::array<float, COLS>::size_type>(seq_seq.size()); ++trial) {
        for (int j = 0; j < COLS; ++j) {

            psth[j] +=   // store the sample

                // Absolute sequence index
                // std::abs(  static_cast<float>(seq_seq[trial][j])  );

                // Relative sequence deviation (i.e. jitter)
                std::abs
                    (  static_cast<float>(seq_seq[trial][j]) - j );

                    // bug fixed: std::abs(  seq_seq[trial][j]  )   sometimes overflows!!
        }
    }

    // Print the normalised histogram.
    // Generate the output text file for plotting.
    // Try sublime to visualise the "cloud of hiccup" consistent across trials.
    // Use gnuplot to plot. See above.
    std::cout << std::endl;
    int ctr = 0;
    for (auto i = psth.begin(), e = psth.end(); i < e; ++i) {
        // mean = the absolute probability of each histogram bin.
        auto mean = *i / static_cast<float>(ROWS);
        std::cout << ctr << " " << mean << " "
            << std::endl;
        ctr++;
    }
    std::cout << std::endl;

    return 0;
}


/* ***********************************************
    EXPERIMENT 3
        * This tim ewe pass an argument to each thread instance. Note use of std::move().
        * The number of thread instances is 200. It was 20 in experiment 2.
    The number fr each of them is also printed (this number = the thread argument). This number is the sequence of invoking of each thread.
    The invoking is done serially in the experiments, but the started threads run concurrently (i.e. simultaneously, not really though).

************************************************ */


void print_relaxed(const std::string & text) {
    std::cout << text
        // << std::endl
        << " "
        //<< std::flush
        ;
}
void print_forceful(const std::string & text) {
    std::cout << text
        // << std::endl
        << " "
        << std::flush
        ;
}

void call_back3(std::string arg) {
    print_relaxed("("+arg+")");
}

/*
Interesting: different orders appear. Sometimes "|" (.join()s) appear before last callbacks
*/
int main_3() {
    std::vector<std::thread> tharray;
    for (int i=0; i < 200; ++i) {
        print_forceful(":");
        std::string arg = std::to_string(i);
        std::thread t1 {call_back3, arg};
        tharray.push_back(std::move(t1));  // wont work without move
        print_forceful(".");
    }
    // Why no output "C" appears before any other "A" or "callback" ?
    for (auto th_i = tharray.begin(), e = tharray.end(); th_i < e; ++th_i) {
        th_i->join();
        print_forceful("|");
    }
    std::cout << std::endl;
    return 0;
}


/* ***********************************************
    EXPERIMENT 2
    A minimal thread example that uses join().
    A kind of PAR_FOR.

    Featuring:
    * Use of join() for accomplishment of a thread.
    Result:
    Different patterns of printed letters shows the irregularity of execusions in a multi-thread program.

    No argument is passed to each thread instance.
    The join() waits for finishing of thread i
    Rename main_2() to main() to run this experiment.

    tharray contains a list of threads. It's a simplistic job scheduler!

************************************************ */


void call_back() {
    print_forceful("call_back()");
}

int main_2() {
    std::vector<std::thread> tharray;
    // a PAR_FOR
    for (int i=0; i < 20; ++i) {
        print_forceful("A");
        std::thread t1 {call_back};
        tharray.push_back(std::move(t1));  // wont work without move
        print_forceful(".");
        // t1.join();
            // Don't join(), i.e. don't wait here untill all are triggered in this PAR_FOR. In the next
            // Because we dont' want to call them serially.
        /// print_forceful("C");
    }
    // Now wait for all to finish and pass.
    // Why no output "C" appears before any other "A" or "callback" ? It happens in the next experiment.
    for (auto th_i = tharray.begin(), e = tharray.end(); th_i < e; ++th_i) {
        th_i->join();
        print_forceful("C");
    }
    // Everybody is gone. (thread-) Silence.
    std::cout << std::endl;
    return 0;
}


/* ***********************************************
    EXPERIMENT 1

    A minimal thread example in C++14.
    No argument is passed, no wait or sync is used.

************************************************ */


int main_1() {
    print_forceful("A");
    std::thread t1 {call_back};  // causes exception
    print_forceful("B");
    t1.join();
    print_forceful("C");
    return 0;
}
	/*
	Copyright Sohail 2016

	This visualises how the actual order of execution varies in a multi-threaded program, in a practical situation.
	A central problem in multithreaded is the fact that we cannot predict (and there is absolutely no guarantee regarding) the sequence in which the actual execution is performed.

	Features example use of std::thread(), std::mutex, std::unique_lock<>, and the methods join(), sleep_for(), sleep_until().

	Four simple experiments in multi-thread programming in C++11/14.
	The 4 Experiments in the reverse order:
	1. A minimal thread program. Tuns one single thread and waits for it!
	2. Runs a par-for with the simplest wait mechanism using join().
	3. Runs a par-for where eah thread (task) also has an argument.
	4. Runs an experiment many (300) times. Each time, 100 tasks are executed, each one takes 1msec. A hiccup in execusion is manifested in the actual sequence of execution, which is due to the capacity-expantion trigger of std::vector<>, which happens at some threads. This std::vector<> is a shared resource.


	How to run it:

	# Note the -lpthread must to be AFTER filename.cpp.
	g++ --pedantic -std=c++14 -Wall -lpthread $1 -lpthread -o $1.out
	./$1.out >seq.txt
	gnuplot -p -e "plot [0:110] [0:110] 'seq.txt' "
	gnuplot -p -e "plot [0:110] [-13:30] 'seq.txt' "
	subl seq.txt # visualises the hiccup in the sequence
	...

	You will need g++, gnuplot, subl.
	*/
	#include <string>
	#include <vector>
	#include <iostream>

	#include <iomanip> // for setw()
	#include <cmath>

	#include <thread>
	#include <mutex>

	#include <chrono>
	using std::this_thread::sleep_for;
	using namespace std::chrono_literals; // ns, us, ms, s, h, etc.



	/* ***********************************************
	EXPERIMENT 4
	Run a function many (100) times in parallel. Repeat the experiment 300 times.
	Each time the order of execution is different and often there is a jitter in the order of execution.
	Plot the overal "sequence jitter" (deviation in the order of execution) using gnuplot.
	How to plot the results:
	gnuplot -p -e "plot [0:110] [0:110] 'seq.txt' "
	gnuplot -p -e "plot [0:110] [-13:30] 'seq.txt' "
	************************************************ */

	// The shared resource:
	std::mutex mutx; // one mutex, many locks
	std::vector<int> sequence;

	/*
	Wait 1 milliseconds and leave a mark. During this time the mutex is locked, to simulate a process that takes 1misec to compute.
	The order of the marks will later determin the order of execusion of the thread.
	*/
	void output_int(int arg) {
	/*
	This crashes wihout a MUTEX.
	First, as an experiment, I deliberately tried this without a mutex. It crashed. It was completely fixed using a mutex. It is reliable.
	The crash message was:

	* Error in `./thread_example1.cpp.out': free(): invalid next size (fast): 0x00007fa5700008c0 *
	Aborted (core dumped)
	*/

	// Lock: creates one instane of the lock for the shared mutex.
	std::unique_lock<std::mutex> this_locking_instance{mutx};

	// Simulate a time-consuming compute
	sleep_for(1000ns);
	/*
	Other possibilities of timing of thread:
	using std::this_thread::sleep_for; // sleep_for, sleep_until
	using std::chrono_literals::ns; // ns, us, ms, s, h, etc.
	using std::chrono_literals::ms; // ns, us, ms, s, h, etc.

	sleep_for(10ns);
	sleep_until(system_clock::now() + 1s);

	*/

	// Leave the mark for later analysis.
	sequence.push_back(arg);

	// In the end, the lock is realease automatically implicitly
	// as the variable this_locking_instance goes out of scope.
	}

	/*
	Prints the deviations of the sequence of execution (variance) from the naive expectation.
	*/
	void print_delta_sequence(const std::vector<int> & sequence) {
	int ctr = 0;
	for (auto i = sequence.begin(), e = sequence.end(); i < e; ++i, ++ctr) {
	int delta = *i - ctr;
	if (delta == 0)
	std::cout << std::setw(3) << ".";
	else
	std::cout << std::setw(3) << delta;
	// cout << format("%-20s %-20s %-8s\n") % "Artist" % "Title" % "Price";
	}
	std::cout << std::endl;
	}

	/*
	Prints the actual sequence. Not used for now.
	*/
	void print_sequence(const std::vector<int> & sequence) {
	std::cout << "size: " << sequence.size() << std::endl;
	for (auto i = sequence.begin(), e = sequence.end(); i < e; ++i) {
	std::cout << *i << " ";
	}
	std::cout << std::endl;
	}


	/*
	Runs the experiment once.
	*/
	void generate_sequence(int count) {
	sequence.resize(0);
	// Adding the following line will make the timings very smooth and uniform ! Otherwise, you can see a hiccup in the plotted result.
	// sequence.reserve(count);
	std::vector<std::thread> thread_vector;
	for (int i=0; i < count; ++i) {
	int arg = i;
	std::thread t1 {output_int, arg};
	thread_vector.push_back(std::move(t1)); // wont work without move
	}
	for (auto th_i = thread_vector.begin(), e = thread_vector.end(); th_i < e; ++th_i) {
	th_i->join();
	}
	}

	/*
	Run the experiment multiple times (one at a time). Stores the results in seq_seq.
	*/
	std::vector<std::vector<int>> seq_seq;

	int main() {
	const int ROWS = 300; // trials, repeats of the experiment.
	const int COLS = 100; // The size of each experiment
	for (int i = 0; i < ROWS; ++i ) {
	generate_sequence(COLS);
	seq_seq.push_back(sequence);
	}

	// Mute the text output
	if (false) {
	for (auto i = seq_seq.begin(), e = seq_seq.end(); i < e; ++i) {
	print_delta_sequence(*i);
	}
	}

	// Average jitter at each "sequence index". Here the Time is the sequence index.
	// So it is aerage deviation rather than a histogram.

	std::array<float, COLS> psth;
	for (std::array<float, COLS>::size_type trial = 0; trial < static_cast<std::array<float, COLS>::size_type>(seq_seq.size()); ++trial) {
	for (int j = 0; j < COLS; ++j) {

	psth[j] += // store the sample

	// Absolute sequence index
	// std::abs( static_cast<float>(seq_seq[trial][j]) );

	// Relative sequence deviation (i.e. jitter)
	std::abs
	( static_cast<float>(seq_seq[trial][j]) - j );

	// bug fixed: std::abs( seq_seq[trial][j] ) sometimes overflows!!
	}
	}

	// Print the normalised histogram.
	// Generate the output text file for plotting.
	// Try sublime to visualise the "cloud of hiccup" consistent across trials.
	// Use gnuplot to plot. See above.
	std::cout << std::endl;
	int ctr = 0;
	for (auto i = psth.begin(), e = psth.end(); i < e; ++i) {
	// mean = the absolute probability of each histogram bin.
	auto mean = *i / static_cast<float>(ROWS);
	std::cout << ctr << " " << mean << " "
	<< std::endl;
	ctr++;
	}
	std::cout << std::endl;

	return 0;
	}



	/* ***********************************************
	EXPERIMENT 3
	* This tim ewe pass an argument to each thread instance. Note use of std::move().
	* The number of thread instances is 200. It was 20 in experiment 2.
	The number fr each of them is also printed (this number = the thread argument). This number is the sequence of invoking of each thread.
	The invoking is done serially in the experiments, but the started threads run concurrently (i.e. simultaneously, not really though).

	************************************************ */


	void print_relaxed(const std::string & text) {
	std::cout << text
	// << std::endl
	<< " "
	//<< std::flush
	;
	}
	void print_forceful(const std::string & text) {
	std::cout << text
	// << std::endl
	<< " "
	<< std::flush
	;
	}

	void call_back3(std::string arg) {
	print_relaxed("("+arg+")");
	}

	/*
	Interesting: different orders appear. Sometimes "\|" (.join()s) appear before last callbacks
	*/
	int main_3() {
	std::vector<std::thread> tharray;
	for (int i=0; i < 200; ++i) {
	print_forceful(":");
	std::string arg = std::to_string(i);
	std::thread t1 {call_back3, arg};
	tharray.push_back(std::move(t1)); // wont work without move
	print_forceful(".");
	}
	// Why no output "C" appears before any other "A" or "callback" ?
	for (auto th_i = tharray.begin(), e = tharray.end(); th_i < e; ++th_i) {
	th_i->join();
	print_forceful("\|");
	}
	std::cout << std::endl;
	return 0;
	}


	/* ***********************************************
	EXPERIMENT 2
	A minimal thread example that uses join().
	A kind of PAR_FOR.

	Featuring:
	* Use of join() for accomplishment of a thread.
	Result:
	Different patterns of printed letters shows the irregularity of execusions in a multi-thread program.

	No argument is passed to each thread instance.
	The join() waits for finishing of thread i
	Rename main_2() to main() to run this experiment.

	tharray contains a list of threads. It's a simplistic job scheduler!

	************************************************ */


	void call_back() {
	print_forceful("call_back()");
	}

	int main_2() {
	std::vector<std::thread> tharray;
	// a PAR_FOR
	for (int i=0; i < 20; ++i) {
	print_forceful("A");
	std::thread t1 {call_back};
	tharray.push_back(std::move(t1)); // wont work without move
	print_forceful(".");
	// t1.join();
	// Don't join(), i.e. don't wait here untill all are triggered in this PAR_FOR. In the next
	// Because we dont' want to call them serially.
	/// print_forceful("C");
	}
	// Now wait for all to finish and pass.
	// Why no output "C" appears before any other "A" or "callback" ? It happens in the next experiment.
	for (auto th_i = tharray.begin(), e = tharray.end(); th_i < e; ++th_i) {
	th_i->join();
	print_forceful("C");
	}
	// Everybody is gone. (thread-) Silence.
	std::cout << std::endl;
	return 0;
	}


	/* ***********************************************
	EXPERIMENT 1

	A minimal thread example in C++14.
	No argument is passed, no wait or sync is used.

	************************************************ */



	int main_1() {
	print_forceful("A");
	std::thread t1 {call_back}; // causes exception
	print_forceful("B");
	t1.join();
	print_forceful("C");
	return 0;
	}