mcwitt/Main.hs Secret

## flake.lock
{
  "nodes": {
    "flake-utils": {
      "inputs": {
        "systems": "systems"
      },
      "locked": {
        "lastModified": 1681202837,
        "narHash": "sha256-H+Rh19JDwRtpVPAWp64F+rlEtxUWBAQW28eAi3SRSzg=",
        "owner": "numtide",
        "repo": "flake-utils",
        "rev": "cfacdce06f30d2b68473a46042957675eebb3401",
        "type": "github"
      },
      "original": {
        "owner": "numtide",
        "repo": "flake-utils",
        "type": "github"
      }
    },
    "nixpkgs": {
      "locked": {
        "lastModified": 1684049129,
        "narHash": "sha256-7WB9LpnPNAS8oI7hMoHeKLNhRX7k3CI9uWBRSfmOCCE=",
        "owner": "NixOS",
        "repo": "nixpkgs",
        "rev": "0470f36b02ef01d4f43c641bbf07020bcab71bf1",
        "type": "github"
      },
      "original": {
        "owner": "NixOS",
        "ref": "nixos-unstable",
        "repo": "nixpkgs",
        "type": "github"
      }
    },
    "root": {
      "inputs": {
        "flake-utils": "flake-utils",
        "nixpkgs": "nixpkgs"
      }
    },
    "systems": {
      "locked": {
        "lastModified": 1681028828,
        "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
        "owner": "nix-systems",
        "repo": "default",
        "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
        "type": "github"
      },
      "original": {
        "owner": "nix-systems",
        "repo": "default",
        "type": "github"
      }
    }
  },
  "root": "root",
  "version": 7
}

## flake.nix
{
  description = "Mandelbrot generator";

  inputs = {
    nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
    flake-utils.url = "github:numtide/flake-utils";
  };

  outputs = { self, nixpkgs, flake-utils }:
    flake-utils.lib.eachDefaultSystem (system:
      let
        pkgs = import nixpkgs {
          inherit system;
          config.allowBroken = true; # accelerate marked broken, but it compiles with GHC 8.10
          config.allowUnfree = true; # nvidia
        };

        # accelerate requires ghc<9.0
        haskellPackages = pkgs.haskell.packages.ghc810.override {
          overrides = _: prev: {
            accelerate-llvm-ptx = pkgs.haskell.lib.dontCheck prev.accelerate-llvm-ptx;
          };
        };

        packageName = "mandelbrot";
      in
      {
        packages = {
          ${packageName} = haskellPackages.callCabal2nix packageName ./. { };
          default = self.outputs.packages.${system}.${packageName};
        };

        devShell = haskellPackages.shellFor {
          packages = _: [ self.outputs.packages.${system}.${packageName} ];
          withHoogle = true;
          buildInputs = with pkgs; [
            cabal-install
            ghcid
            haskellPackages.haskell-language-server
            haskellPackages.cabal-fmt
            ormolu
          ];
        };
      });
}

## Main.hs
module Main where

import Data.Array.Accelerate as A
import qualified Data.Array.Accelerate.LLVM.PTX as GPU

indices :: Exp Int -> Acc (Array DIM2 (Int, Int))
indices n = generate (I2 n n) unindex2

run :: Int -> IO ()
run =
  writeFile "out"
    . show
    . GPU.run
    . indices
    . constant

main :: IO ()
main = run 363 -- <= 362 succeeds

## mandelbrot.cabal
cabal-version: 1.12
name:          mandelbrot
version:       0.0.0
build-type:    Simple

executable hello
  main-is:          Main.hs
  other-modules:    Paths_mandelbrot
  ghc-options:      -Wall
  build-depends:
      accelerate
    , accelerate-llvm-ptx
    , base                 >=4 && <5
    , linear

  default-language: Haskell2010

## nvidia-device-query
CUDA device query (Driver API, statically linked)
CUDA driver version 12.1
CUDA API version 11.7
Detected 2 CUDA capable devices

Device 0: NVIDIA TITAN X (Pascal)
  CUDA capability:                          6.1
  CUDA cores:                               3584 cores in 28 multiprocessors (128 cores/MP)
  Global memory:                            12 GB
  Constant memory:                          64 kB
  Shared memory per block:                  48 kB
  Registers per block:                      65536
  Warp size:                                32
  Maximum threads per multiprocessor:       2048
  Maximum threads per block:                1024
  Maximum grid dimensions:                  2147483647 x 65535 x 65535
  Maximum block dimensions:                 1024 x 1024 x 64
  GPU clock rate:                           1.531 GHz
  Memory clock rate:                        5.005 GHz
  Memory bus width:                         384-bit
  L2 cache size:                            3 MB
  Maximum texture dimensions
    1D:                                     131072
    2D:                                     131072 x 65536
    3D:                                     16384 x 16384 x 16384
  Texture alignment:                        512 B
  Maximum memory pitch:                     2 GB
  Concurrent kernel execution:              Yes
  Concurrent copy and execution:            Yes, with 2 copy engines
  Runtime limit on kernel execution:        Yes
  Integrated GPU sharing host memory:       No
  Host page-locked memory mapping:          Yes
  ECC memory support:                       No
  Unified addressing (UVA):                 Yes
  Single to double precision performance:   32 : 1
  Supports compute pre-emption:             Yes
  Supports cooperative launch:              Yes
  Supports multi-device cooperative launch: Yes
  PCI bus/location:                         1/0
  Compute mode:                             Default
    Multiple contexts are allowed on the device simultaneously
  Peer-to-Peer access to:
    Device 1 (NVIDIA TITAN X (Pascal)): Yes

Device 1: NVIDIA TITAN X (Pascal)
  CUDA capability:                          6.1
  CUDA cores:                               3584 cores in 28 multiprocessors (128 cores/MP)
  Global memory:                            12 GB
  Constant memory:                          64 kB
  Shared memory per block:                  48 kB
  Registers per block:                      65536
  Warp size:                                32
  Maximum threads per multiprocessor:       2048
  Maximum threads per block:                1024
  Maximum grid dimensions:                  2147483647 x 65535 x 65535
  Maximum block dimensions:                 1024 x 1024 x 64
  GPU clock rate:                           1.531 GHz
  Memory clock rate:                        5.005 GHz
  Memory bus width:                         384-bit
  L2 cache size:                            3 MB
  Maximum texture dimensions
    1D:                                     131072
    2D:                                     131072 x 65536
    3D:                                     16384 x 16384 x 16384
  Texture alignment:                        512 B
  Maximum memory pitch:                     2 GB
  Concurrent kernel execution:              Yes
  Concurrent copy and execution:            Yes, with 2 copy engines
  Runtime limit on kernel execution:        Yes
  Integrated GPU sharing host memory:       No
  Host page-locked memory mapping:          Yes
  ECC memory support:                       No
  Unified addressing (UVA):                 Yes
  Single to double precision performance:   32 : 1
  Supports compute pre-emption:             Yes
  Supports cooperative launch:              Yes
  Supports multi-device cooperative launch: Yes
  PCI bus/location:                         2/0
  Compute mode:                             Default
    Multiple contexts are allowed on the device simultaneously
  Peer-to-Peer access to:
    Device 0 (NVIDIA TITAN X (Pascal)): Yes

## versions.txt
accelerate: 1.3.0.0
accelerate-llvm-ptx: 1.3.0.0

GHC: 8.10.7

CUDA: 12.1
NVIDIA driver: 530.41.03
	{
	"nodes": {
	"flake-utils": {
	"inputs": {
	"systems": "systems"
	},
	"locked": {
	"lastModified": 1681202837,
	"narHash": "sha256-H+Rh19JDwRtpVPAWp64F+rlEtxUWBAQW28eAi3SRSzg=",
	"owner": "numtide",
	"repo": "flake-utils",
	"rev": "cfacdce06f30d2b68473a46042957675eebb3401",
	"type": "github"
	},
	"original": {
	"owner": "numtide",
	"repo": "flake-utils",
	"type": "github"
	}
	},
	"nixpkgs": {
	"locked": {
	"lastModified": 1684049129,
	"narHash": "sha256-7WB9LpnPNAS8oI7hMoHeKLNhRX7k3CI9uWBRSfmOCCE=",
	"owner": "NixOS",
	"repo": "nixpkgs",
	"rev": "0470f36b02ef01d4f43c641bbf07020bcab71bf1",
	"type": "github"
	},
	"original": {
	"owner": "NixOS",
	"ref": "nixos-unstable",
	"repo": "nixpkgs",
	"type": "github"
	}
	},
	"root": {
	"inputs": {
	"flake-utils": "flake-utils",
	"nixpkgs": "nixpkgs"
	}
	},
	"systems": {
	"locked": {
	"lastModified": 1681028828,
	"narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
	"owner": "nix-systems",
	"repo": "default",
	"rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
	"type": "github"
	},
	"original": {
	"owner": "nix-systems",
	"repo": "default",
	"type": "github"
	}
	}
	},
	"root": "root",
	"version": 7
	}
	{
	description = "Mandelbrot generator";

	inputs = {
	nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
	flake-utils.url = "github:numtide/flake-utils";
	};

	outputs = { self, nixpkgs, flake-utils }:
	flake-utils.lib.eachDefaultSystem (system:
	let
	pkgs = import nixpkgs {
	inherit system;
	config.allowBroken = true; # accelerate marked broken, but it compiles with GHC 8.10
	config.allowUnfree = true; # nvidia
	};

	# accelerate requires ghc<9.0
	haskellPackages = pkgs.haskell.packages.ghc810.override {
	overrides = _: prev: {
	accelerate-llvm-ptx = pkgs.haskell.lib.dontCheck prev.accelerate-llvm-ptx;
	};
	};

	packageName = "mandelbrot";
	in
	{
	packages = {
	${packageName} = haskellPackages.callCabal2nix packageName ./. { };
	default = self.outputs.packages.${system}.${packageName};
	};

	devShell = haskellPackages.shellFor {
	packages = _: [ self.outputs.packages.${system}.${packageName} ];
	withHoogle = true;
	buildInputs = with pkgs; [
	cabal-install
	ghcid
	haskellPackages.haskell-language-server
	haskellPackages.cabal-fmt
	ormolu
	];
	};
	});
	}
	module Main where

	import Data.Array.Accelerate as A
	import qualified Data.Array.Accelerate.LLVM.PTX as GPU

	indices :: Exp Int -> Acc (Array DIM2 (Int, Int))
	indices n = generate (I2 n n) unindex2

	run :: Int -> IO ()
	run =
	writeFile "out"
	. show
	. GPU.run
	. indices
	. constant

	main :: IO ()
	main = run 363 -- <= 362 succeeds
	cabal-version: 1.12
	name: mandelbrot
	version: 0.0.0
	build-type: Simple

	executable hello
	main-is: Main.hs
	other-modules: Paths_mandelbrot
	ghc-options: -Wall
	build-depends:
	accelerate
	, accelerate-llvm-ptx
	, base >=4 && <5
	, linear

	default-language: Haskell2010
	CUDA device query (Driver API, statically linked)
	CUDA driver version 12.1
	CUDA API version 11.7
	Detected 2 CUDA capable devices

	Device 0: NVIDIA TITAN X (Pascal)
	CUDA capability: 6.1
	CUDA cores: 3584 cores in 28 multiprocessors (128 cores/MP)
	Global memory: 12 GB
	Constant memory: 64 kB
	Shared memory per block: 48 kB
	Registers per block: 65536
	Warp size: 32
	Maximum threads per multiprocessor: 2048
	Maximum threads per block: 1024
	Maximum grid dimensions: 2147483647 x 65535 x 65535
	Maximum block dimensions: 1024 x 1024 x 64
	GPU clock rate: 1.531 GHz
	Memory clock rate: 5.005 GHz
	Memory bus width: 384-bit
	L2 cache size: 3 MB
	Maximum texture dimensions
	1D: 131072
	2D: 131072 x 65536
	3D: 16384 x 16384 x 16384
	Texture alignment: 512 B
	Maximum memory pitch: 2 GB
	Concurrent kernel execution: Yes
	Concurrent copy and execution: Yes, with 2 copy engines
	Runtime limit on kernel execution: Yes
	Integrated GPU sharing host memory: No
	Host page-locked memory mapping: Yes
	ECC memory support: No
	Unified addressing (UVA): Yes
	Single to double precision performance: 32 : 1
	Supports compute pre-emption: Yes
	Supports cooperative launch: Yes
	Supports multi-device cooperative launch: Yes
	PCI bus/location: 1/0
	Compute mode: Default
	Multiple contexts are allowed on the device simultaneously
	Peer-to-Peer access to:
	Device 1 (NVIDIA TITAN X (Pascal)): Yes

	Device 1: NVIDIA TITAN X (Pascal)
	CUDA capability: 6.1
	CUDA cores: 3584 cores in 28 multiprocessors (128 cores/MP)
	Global memory: 12 GB
	Constant memory: 64 kB
	Shared memory per block: 48 kB
	Registers per block: 65536
	Warp size: 32
	Maximum threads per multiprocessor: 2048
	Maximum threads per block: 1024
	Maximum grid dimensions: 2147483647 x 65535 x 65535
	Maximum block dimensions: 1024 x 1024 x 64
	GPU clock rate: 1.531 GHz
	Memory clock rate: 5.005 GHz
	Memory bus width: 384-bit
	L2 cache size: 3 MB
	Maximum texture dimensions
	1D: 131072
	2D: 131072 x 65536
	3D: 16384 x 16384 x 16384
	Texture alignment: 512 B
	Maximum memory pitch: 2 GB
	Concurrent kernel execution: Yes
	Concurrent copy and execution: Yes, with 2 copy engines
	Runtime limit on kernel execution: Yes
	Integrated GPU sharing host memory: No
	Host page-locked memory mapping: Yes
	ECC memory support: No
	Unified addressing (UVA): Yes
	Single to double precision performance: 32 : 1
	Supports compute pre-emption: Yes
	Supports cooperative launch: Yes
	Supports multi-device cooperative launch: Yes
	PCI bus/location: 2/0
	Compute mode: Default
	Multiple contexts are allowed on the device simultaneously
	Peer-to-Peer access to:
	Device 0 (NVIDIA TITAN X (Pascal)): Yes
	accelerate: 1.3.0.0
	accelerate-llvm-ptx: 1.3.0.0

	GHC: 8.10.7

	CUDA: 12.1
	NVIDIA driver: 530.41.03