rgambee/Caboose Numbers.md

## Caboose Numbers.md

      
    Raw
  

              Caboose Numbers.md
            
          
    Caboose Numbers

After watching a
video about caboose numbers,
I was curious to visualize their ability to generate primes. So I wrote this
script to do exactly that!
Dependencies

Python 3.8 or later
Libraries:

pandas
matplotlib
sympy

pip install pandas matplotlib sympy
Usage

The script takes one optional argument, which is the maximum caboose number to
check. By default, it will keep checking until interrupted with Ctrl+C.
# Check up to 100
python caboose_numbers.py 100

# Run until stopped
python caboose_numbers.py
Results

Attached are plots showing the results for caboose numbers up through 100 and up
through 10,000.

  
## caboose-numbers_2-to-100.png

      
    Raw
  

              caboose-numbers_2-to-100.png
            
          
## caboose-numbers_2-to-10000.png

      
    Raw
  

              caboose-numbers_2-to-10000.png
            
          
## caboose_numbers.py
#!/usr/bin/env python3
import argparse
import itertools
import multiprocessing
import signal
from typing import Iterable

import matplotlib.pyplot as plt
import pandas as pd
import sympy


def count_primes(caboose: int, pool: multiprocessing.Pool) -> int:
    n_values = range(1, caboose)
    to_check = (n**2 - n + caboose for n in n_values)
    return sum(pool.imap_unordered(sympy.isprime, to_check, chunksize=10))


def main() -> None:
    parser = argparse.ArgumentParser(
        description="Plot the ability of caboose number to generate primes",
        epilog="For more information, see https://www.youtube.com/watch?v=gM5uNcgn2NQ",
    )
    parser.add_argument(
        "max_caboose",
        nargs="?",
        type=int,
        help="Maximum caboose number to check. By default, check until stopped.",
    )
    args = parser.parse_args()

    results_dict: dict[str, list[float]] = {
        "caboose": [],
        "caboose_is_prime": [],
        "prime_count": [],
    }
    print("Checking caboose numbers...")
    if args.max_caboose is None:
        print("Will run until stopped with Ctrl+C")
        cabooses_to_check: Iterable[int] = itertools.count(2)
    else:
        cabooses_to_check = range(2, args.max_caboose + 1)

    # Ignore SIGINT when creating the pool so that the user can exit the loop.
    # Based on https://stackoverflow.com/a/35134329
    original_sigint_handler = signal.signal(signal.SIGINT, signal.SIG_IGN)
    pool = multiprocessing.Pool()
    signal.signal(signal.SIGINT, original_sigint_handler)

    try:
        with pool:
            for caboose in cabooses_to_check:
                print(caboose, end="\r")
                caboose_is_prime = sympy.isprime(caboose)
                count = count_primes(caboose, pool)
                results_dict["caboose"].append(caboose)
                results_dict["caboose_is_prime"].append(caboose_is_prime)
                results_dict["prime_count"].append(count)
    except KeyboardInterrupt:
        pass
    print(f"Computation stopped at {results_dict['caboose'][-1]}. Plotting results.")

    # Make sure the lists in the dictionary all have the same length. The interrupt
    # could have happened in the middle of the appends.
    length = len(results_dict["prime_count"])
    for lst in results_dict.values():
        while len(lst) > length:
            lst.pop()
    results = pd.DataFrame(results_dict)

    fig, [abs_axis, rel_axis] = plt.subplots(nrows=2, ncols=1, sharex=True)

    # Plot the caboose number vs. the prime count. Mark caboose numbers which are
    # primes themselves in a different color.
    composite_cabooses = results[~results["caboose_is_prime"]]
    abs_axis.plot(
        composite_cabooses["caboose"],
        composite_cabooses["prime_count"],
        marker=".",
        linestyle="none",
        color="C0",
        label="Composite Caboose"
    )
    prime_cabooses = results[results["caboose_is_prime"]]
    abs_axis.plot(
        prime_cabooses["caboose"],
        prime_cabooses["prime_count"],
        marker=".",
        linestyle="none",
        color="C1",
        label="Prime Caboose",
    )
    # Show the line y = x - 1 for comparison
    abs_axis.plot(
        [1, results["caboose"].iloc[-1] + 1],
        [0, results["caboose"].iloc[-1]],
        linestyle="dashed",
        color="gray",
        zorder=1.9,
    )
    abs_axis.set_xlabel("Caboose Number")
    abs_axis.set_ylabel("Count of Generated\nNumbers That are Prime")
    abs_axis.legend(loc="upper left")

    # Now do the same but plot the proportion of primes along the vertical axis by
    # dividing the prime count by the caboose number.
    rel_axis.plot(
        composite_cabooses["caboose"],
        composite_cabooses["prime_count"] / (composite_cabooses["caboose"] - 1),
        marker=".",
        linestyle="none",
        color="C0",
    )
    prime_cabooses = results[results["caboose_is_prime"]]
    rel_axis.plot(
        prime_cabooses["caboose"],
        prime_cabooses["prime_count"] / (prime_cabooses["caboose"] - 1),
        marker=".",
        linestyle="none",
        color="C1",
    )
    # Show the line y = 1 for comparison
    rel_axis.plot(
        [0, results["caboose"].iloc[-1]],
        [1, 1],
        linestyle="dashed",
        color="gray",
        zorder=1.9,
    )
    rel_axis.set_xlabel("Caboose Number")
    rel_axis.set_ylabel("Proportion of Generated\nNumbers That are Prime")

    plt.show()
    if plt.isinteractive():
        breakpoint()


if __name__ == "__main__":
    main()
	#!/usr/bin/env python3
	import argparse
	import itertools
	import multiprocessing
	import signal
	from typing import Iterable

	import matplotlib.pyplot as plt
	import pandas as pd
	import sympy


	def count_primes(caboose: int, pool: multiprocessing.Pool) -> int:
	n_values = range(1, caboose)
	to_check = (n**2 - n + caboose for n in n_values)
	return sum(pool.imap_unordered(sympy.isprime, to_check, chunksize=10))


	def main() -> None:
	parser = argparse.ArgumentParser(
	description="Plot the ability of caboose number to generate primes",
	epilog="For more information, see https://www.youtube.com/watch?v=gM5uNcgn2NQ",
	)
	parser.add_argument(
	"max_caboose",
	nargs="?",
	type=int,
	help="Maximum caboose number to check. By default, check until stopped.",
	)
	args = parser.parse_args()

	results_dict: dict[str, list[float]] = {
	"caboose": [],
	"caboose_is_prime": [],
	"prime_count": [],
	}
	print("Checking caboose numbers...")
	if args.max_caboose is None:
	print("Will run until stopped with Ctrl+C")
	cabooses_to_check: Iterable[int] = itertools.count(2)
	else:
	cabooses_to_check = range(2, args.max_caboose + 1)

	# Ignore SIGINT when creating the pool so that the user can exit the loop.
	# Based on https://stackoverflow.com/a/35134329
	original_sigint_handler = signal.signal(signal.SIGINT, signal.SIG_IGN)
	pool = multiprocessing.Pool()
	signal.signal(signal.SIGINT, original_sigint_handler)

	try:
	with pool:
	for caboose in cabooses_to_check:
	print(caboose, end="\r")
	caboose_is_prime = sympy.isprime(caboose)
	count = count_primes(caboose, pool)
	results_dict["caboose"].append(caboose)
	results_dict["caboose_is_prime"].append(caboose_is_prime)
	results_dict["prime_count"].append(count)
	except KeyboardInterrupt:
	pass
	print(f"Computation stopped at {results_dict['caboose'][-1]}. Plotting results.")

	# Make sure the lists in the dictionary all have the same length. The interrupt
	# could have happened in the middle of the appends.
	length = len(results_dict["prime_count"])
	for lst in results_dict.values():
	while len(lst) > length:
	lst.pop()
	results = pd.DataFrame(results_dict)

	fig, [abs_axis, rel_axis] = plt.subplots(nrows=2, ncols=1, sharex=True)

	# Plot the caboose number vs. the prime count. Mark caboose numbers which are
	# primes themselves in a different color.
	composite_cabooses = results[~results["caboose_is_prime"]]
	abs_axis.plot(
	composite_cabooses["caboose"],
	composite_cabooses["prime_count"],
	marker=".",
	linestyle="none",
	color="C0",
	label="Composite Caboose"
	)
	prime_cabooses = results[results["caboose_is_prime"]]
	abs_axis.plot(
	prime_cabooses["caboose"],
	prime_cabooses["prime_count"],
	marker=".",
	linestyle="none",
	color="C1",
	label="Prime Caboose",
	)
	# Show the line y = x - 1 for comparison
	abs_axis.plot(
	[1, results["caboose"].iloc[-1] + 1],
	[0, results["caboose"].iloc[-1]],
	linestyle="dashed",
	color="gray",
	zorder=1.9,
	)
	abs_axis.set_xlabel("Caboose Number")
	abs_axis.set_ylabel("Count of Generated\nNumbers That are Prime")
	abs_axis.legend(loc="upper left")

	# Now do the same but plot the proportion of primes along the vertical axis by
	# dividing the prime count by the caboose number.
	rel_axis.plot(
	composite_cabooses["caboose"],
	composite_cabooses["prime_count"] / (composite_cabooses["caboose"] - 1),
	marker=".",
	linestyle="none",
	color="C0",
	)
	prime_cabooses = results[results["caboose_is_prime"]]
	rel_axis.plot(
	prime_cabooses["caboose"],
	prime_cabooses["prime_count"] / (prime_cabooses["caboose"] - 1),
	marker=".",
	linestyle="none",
	color="C1",
	)
	# Show the line y = 1 for comparison
	rel_axis.plot(
	[0, results["caboose"].iloc[-1]],
	[1, 1],
	linestyle="dashed",
	color="gray",
	zorder=1.9,
	)
	rel_axis.set_xlabel("Caboose Number")
	rel_axis.set_ylabel("Proportion of Generated\nNumbers That are Prime")

	plt.show()
	if plt.isinteractive():
	breakpoint()


	if __name__ == "__main__":
	main()