Skip to content

Instantly share code, notes, and snippets.

@mcwitt

mcwitt/Main.hs Secret

Last active May 30, 2023 01:49
Show Gist options
  • Save mcwitt/8065817084a202e72b6c27090432ce28 to your computer and use it in GitHub Desktop.
Save mcwitt/8065817084a202e72b6c27090432ce28 to your computer and use it in GitHub Desktop.
{
"nodes": {
"flake-utils": {
"inputs": {
"systems": "systems"
},
"locked": {
"lastModified": 1681202837,
"narHash": "sha256-H+Rh19JDwRtpVPAWp64F+rlEtxUWBAQW28eAi3SRSzg=",
"owner": "numtide",
"repo": "flake-utils",
"rev": "cfacdce06f30d2b68473a46042957675eebb3401",
"type": "github"
},
"original": {
"owner": "numtide",
"repo": "flake-utils",
"type": "github"
}
},
"nixpkgs": {
"locked": {
"lastModified": 1684049129,
"narHash": "sha256-7WB9LpnPNAS8oI7hMoHeKLNhRX7k3CI9uWBRSfmOCCE=",
"owner": "NixOS",
"repo": "nixpkgs",
"rev": "0470f36b02ef01d4f43c641bbf07020bcab71bf1",
"type": "github"
},
"original": {
"owner": "NixOS",
"ref": "nixos-unstable",
"repo": "nixpkgs",
"type": "github"
}
},
"root": {
"inputs": {
"flake-utils": "flake-utils",
"nixpkgs": "nixpkgs"
}
},
"systems": {
"locked": {
"lastModified": 1681028828,
"narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
"owner": "nix-systems",
"repo": "default",
"rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
"type": "github"
},
"original": {
"owner": "nix-systems",
"repo": "default",
"type": "github"
}
}
},
"root": "root",
"version": 7
}
{
description = "Mandelbrot generator";
inputs = {
nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
flake-utils.url = "github:numtide/flake-utils";
};
outputs = { self, nixpkgs, flake-utils }:
flake-utils.lib.eachDefaultSystem (system:
let
pkgs = import nixpkgs {
inherit system;
config.allowBroken = true; # accelerate marked broken, but it compiles with GHC 8.10
config.allowUnfree = true; # nvidia
};
# accelerate requires ghc<9.0
haskellPackages = pkgs.haskell.packages.ghc810.override {
overrides = _: prev: {
accelerate-llvm-ptx = pkgs.haskell.lib.dontCheck prev.accelerate-llvm-ptx;
};
};
packageName = "mandelbrot";
in
{
packages = {
${packageName} = haskellPackages.callCabal2nix packageName ./. { };
default = self.outputs.packages.${system}.${packageName};
};
devShell = haskellPackages.shellFor {
packages = _: [ self.outputs.packages.${system}.${packageName} ];
withHoogle = true;
buildInputs = with pkgs; [
cabal-install
ghcid
haskellPackages.haskell-language-server
haskellPackages.cabal-fmt
ormolu
];
};
});
}
module Main where
import Data.Array.Accelerate as A
import qualified Data.Array.Accelerate.LLVM.PTX as GPU
indices :: Exp Int -> Acc (Array DIM2 (Int, Int))
indices n = generate (I2 n n) unindex2
run :: Int -> IO ()
run =
writeFile "out"
. show
. GPU.run
. indices
. constant
main :: IO ()
main = run 363 -- <= 362 succeeds
cabal-version: 1.12
name: mandelbrot
version: 0.0.0
build-type: Simple
executable hello
main-is: Main.hs
other-modules: Paths_mandelbrot
ghc-options: -Wall
build-depends:
accelerate
, accelerate-llvm-ptx
, base >=4 && <5
, linear
default-language: Haskell2010
CUDA device query (Driver API, statically linked)
CUDA driver version 12.1
CUDA API version 11.7
Detected 2 CUDA capable devices
Device 0: NVIDIA TITAN X (Pascal)
CUDA capability: 6.1
CUDA cores: 3584 cores in 28 multiprocessors (128 cores/MP)
Global memory: 12 GB
Constant memory: 64 kB
Shared memory per block: 48 kB
Registers per block: 65536
Warp size: 32
Maximum threads per multiprocessor: 2048
Maximum threads per block: 1024
Maximum grid dimensions: 2147483647 x 65535 x 65535
Maximum block dimensions: 1024 x 1024 x 64
GPU clock rate: 1.531 GHz
Memory clock rate: 5.005 GHz
Memory bus width: 384-bit
L2 cache size: 3 MB
Maximum texture dimensions
1D: 131072
2D: 131072 x 65536
3D: 16384 x 16384 x 16384
Texture alignment: 512 B
Maximum memory pitch: 2 GB
Concurrent kernel execution: Yes
Concurrent copy and execution: Yes, with 2 copy engines
Runtime limit on kernel execution: Yes
Integrated GPU sharing host memory: No
Host page-locked memory mapping: Yes
ECC memory support: No
Unified addressing (UVA): Yes
Single to double precision performance: 32 : 1
Supports compute pre-emption: Yes
Supports cooperative launch: Yes
Supports multi-device cooperative launch: Yes
PCI bus/location: 1/0
Compute mode: Default
Multiple contexts are allowed on the device simultaneously
Peer-to-Peer access to:
Device 1 (NVIDIA TITAN X (Pascal)): Yes
Device 1: NVIDIA TITAN X (Pascal)
CUDA capability: 6.1
CUDA cores: 3584 cores in 28 multiprocessors (128 cores/MP)
Global memory: 12 GB
Constant memory: 64 kB
Shared memory per block: 48 kB
Registers per block: 65536
Warp size: 32
Maximum threads per multiprocessor: 2048
Maximum threads per block: 1024
Maximum grid dimensions: 2147483647 x 65535 x 65535
Maximum block dimensions: 1024 x 1024 x 64
GPU clock rate: 1.531 GHz
Memory clock rate: 5.005 GHz
Memory bus width: 384-bit
L2 cache size: 3 MB
Maximum texture dimensions
1D: 131072
2D: 131072 x 65536
3D: 16384 x 16384 x 16384
Texture alignment: 512 B
Maximum memory pitch: 2 GB
Concurrent kernel execution: Yes
Concurrent copy and execution: Yes, with 2 copy engines
Runtime limit on kernel execution: Yes
Integrated GPU sharing host memory: No
Host page-locked memory mapping: Yes
ECC memory support: No
Unified addressing (UVA): Yes
Single to double precision performance: 32 : 1
Supports compute pre-emption: Yes
Supports cooperative launch: Yes
Supports multi-device cooperative launch: Yes
PCI bus/location: 2/0
Compute mode: Default
Multiple contexts are allowed on the device simultaneously
Peer-to-Peer access to:
Device 0 (NVIDIA TITAN X (Pascal)): Yes
accelerate: 1.3.0.0
accelerate-llvm-ptx: 1.3.0.0
GHC: 8.10.7
CUDA: 12.1
NVIDIA driver: 530.41.03
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment