marcamillion/serel.rb Secret

## serel.rb
> w = Serel::Question.filter(:withbody).find(q.first.so_id)
[INFO][2013-04-08 00:39:17] Making request to /2.0/questions/11227809?filter=withbody&site=stackoverflow&key=some_key%28%28
 => [#<Serel::Question:70352730636420 @question_id=11227809, @last_edit_date=2013-04-06 14:31:38 -0500, @creation_date=2012-06-27 08:51:36 -0500, @last_activity_date=2013-04-06 14:31:38 -0500, @score=4262, @answer_count=9, @accepted_answer_id=11227902, @protected_date=2012-06-28 23:02:46 -0500, @body=<p>Here is a piece of C++ code that shows some very peculiar performance. For some strange reason, sorting the data miraculously speeds up the code by almost 6x:</p>

<pre class="lang-cpp prettyprint-override"><code>#include &lt;algorithm&gt;
#include &lt;ctime&gt;
#include &lt;iostream&gt;

int main()
{
    // Generate data
    const unsigned arraySize = 32768;
    int data[arraySize];

    for (unsigned c = 0; c &lt; arraySize; ++c)
        data[c] = std::rand() % 256;

    // !!! With this, the next loop runs faster
    std::sort(data, data + arraySize);

    // Test
    clock_t start = clock();
    long long sum = 0;

    for (unsigned i = 0; i &lt; 100000; ++i)
    {
        // Primary loop
        for (unsigned c = 0; c &lt; arraySize; ++c)
        {
            if (data[c] &gt;= 128)
                sum += data[c];
        }
    }

    double elapsedTime = static_cast&lt;double&gt;(clock() - start) / CLOCKS_PER_SEC;

    std::cout &lt;&lt; elapsedTime &lt;&lt; std::endl;
    std::cout &lt;&lt; "sum = " &lt;&lt; sum &lt;&lt; std::endl;
}
</code></pre>

<ul>
<li>Without <code>std::sort(data, data + arraySize);</code>, the code runs in <strong>11.54</strong> seconds.</li>
<li>With the sorted data, the code runs in <strong>1.93</strong> seconds.</li>
</ul>

<hr>

<p>Initially I thought this might be just a language or compiler anomaly. So I tried it in Java:</p>

<pre class="lang-java prettyprint-override"><code>import java.util.Arrays;
import java.util.Random;

public class Main
{
    public static void main(String[] args)
    {
        // Generate data
        int arraySize = 32768;
        int data[] = new int[arraySize];

        Random rnd = new Random(0);
        for (int c = 0; c &lt; arraySize; ++c)
            data[c] = rnd.nextInt() % 256;

        // !!! With this, the next loop runs faster
        Arrays.sort(data);

        // Test
        long start = System.nanoTime();
        long sum = 0;

        for (int i = 0; i &lt; 100000; ++i)
        {
            // Primary loop
            for (int c = 0; c &lt; arraySize; ++c)
            {
                if (data[c] &gt;= 128)
                    sum += data[c];
            }
        }

        System.out.println((System.nanoTime() - start) / 1000000000.0);
        System.out.println("sum = " + sum);
    }
}
</code></pre>

<p>with a similar but less extreme result.</p>

<hr>

<p>My first thought was that sorting brings the data into cache, but my next thought was how silly that is because the array was just generated.</p>

<p>What is going on? Why is a sorted array faster than an unsorted array? The code is summing up some independent terms, the order should not matter.</p>
, @title=Why is processing a sorted array faster than an unsorted array?, @tags=["c++", "performance", "optimization", "language-agnostic", "branch-prediction"], @view_count=222854, @link=http://stackoverflow.com/questions/11227809/why-is-processing-a-sorted-array-faster-than-an-unsorted-array, @is_answered=true @owner=#<Serel::User:70352730647660>>]
	> w = Serel::Question.filter(:withbody).find(q.first.so_id)
	[INFO][2013-04-08 00:39:17] Making request to /2.0/questions/11227809?filter=withbody&site=stackoverflow&key=some_key%28%28
	=> [#<Serel::Question:70352730636420 @question_id=11227809, @last_edit_date=2013-04-06 14:31:38 -0500, @creation_date=2012-06-27 08:51:36 -0500, @last_activity_date=2013-04-06 14:31:38 -0500, @score=4262, @answer_count=9, @accepted_answer_id=11227902, @protected_date=2012-06-28 23:02:46 -0500, @body=<p>Here is a piece of C++ code that shows some very peculiar performance. For some strange reason, sorting the data miraculously speeds up the code by almost 6x:</p>

	<pre class="lang-cpp prettyprint-override"><code>#include <algorithm>
	#include <ctime>
	#include <iostream>

	int main()
	{
	// Generate data
	const unsigned arraySize = 32768;
	int data[arraySize];

	for (unsigned c = 0; c < arraySize; ++c)
	data[c] = std::rand() % 256;

	// !!! With this, the next loop runs faster
	std::sort(data, data + arraySize);

	// Test
	clock_t start = clock();
	long long sum = 0;

	for (unsigned i = 0; i < 100000; ++i)
	{
	// Primary loop
	for (unsigned c = 0; c < arraySize; ++c)
	{
	if (data[c] >= 128)
	sum += data[c];
	}
	}

	double elapsedTime = static_cast<double>(clock() - start) / CLOCKS_PER_SEC;

	std::cout << elapsedTime << std::endl;
	std::cout << "sum = " << sum << std::endl;
	}
	</code></pre>

	<ul>
	<li>Without <code>std::sort(data, data + arraySize);</code>, the code runs in <strong>11.54</strong> seconds.</li>
	<li>With the sorted data, the code runs in <strong>1.93</strong> seconds.</li>
	</ul>

	<hr>

	<p>Initially I thought this might be just a language or compiler anomaly. So I tried it in Java:</p>

	<pre class="lang-java prettyprint-override"><code>import java.util.Arrays;
	import java.util.Random;

	public class Main
	{
	public static void main(String[] args)
	{
	// Generate data
	int arraySize = 32768;
	int data[] = new int[arraySize];

	Random rnd = new Random(0);
	for (int c = 0; c < arraySize; ++c)
	data[c] = rnd.nextInt() % 256;

	// !!! With this, the next loop runs faster
	Arrays.sort(data);

	// Test
	long start = System.nanoTime();
	long sum = 0;

	for (int i = 0; i < 100000; ++i)
	{
	// Primary loop
	for (int c = 0; c < arraySize; ++c)
	{
	if (data[c] >= 128)
	sum += data[c];
	}
	}

	System.out.println((System.nanoTime() - start) / 1000000000.0);
	System.out.println("sum = " + sum);
	}
	}
	</code></pre>

	<p>with a similar but less extreme result.</p>

	<hr>

	<p>My first thought was that sorting brings the data into cache, but my next thought was how silly that is because the array was just generated.</p>

	<p>What is going on? Why is a sorted array faster than an unsorted array? The code is summing up some independent terms, the order should not matter.</p>
	, @title=Why is processing a sorted array faster than an unsorted array?, @tags=["c++", "performance", "optimization", "language-agnostic", "branch-prediction"], @view_count=222854, @link=http://stackoverflow.com/questions/11227809/why-is-processing-a-sorted-array-faster-than-an-unsorted-array, @is_answered=true @owner=#<Serel::User:70352730647660>>]