systemctl daemon-reload
systemctl enable --now job@{08:00,12:00,21:00}.timer
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <memory> | |
#include <mutex> | |
#include <thread> | |
class MeanTracker { | |
int total_ = 0; | |
float mean_ = 0.f; | |
public: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
__device__ int warpInclusiveScan(int val) { | |
int laneId = threadIdx.x % warpSize; | |
for (int offset = 1; offset < 32; offset <<= 1) { | |
int v = __shfl_up_sync(0xffffffff, val, offset); | |
if (laneId >= offset) val += v; | |
} | |
return val; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/python/aitemplate/backend/cuda/conv2d/common.py b/python/aitemplate/backend/cuda/conv2d/common.py | |
index 8cf7fb2..ca13a72 100644 | |
--- a/python/aitemplate/backend/cuda/conv2d/common.py | |
+++ b/python/aitemplate/backend/cuda/conv2d/common.py | |
@@ -501,6 +501,7 @@ def emit_instance(op): | |
emiter = cutlass_lib.conv2d_operation.EmitConv2dWithBroadcastInstance() | |
else: | |
emiter = cutlass_lib.conv2d_operation.EmitConv2dInstance() | |
+ op.tile_description.stages = 2 | |
op_def = emiter.emit(op) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
static size_t GLOBAL_WORKSPACE_SIZE_DeviceConvFwdInstance_0 = 0; | |
#include <cstdio> | |
#include <stdexcept> | |
#include "cutlass/cutlass.h" | |
#include "cutlass/conv/kernel/default_conv2d_fprop.h" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Maintainer: Leo Mao <leomaoyw at gmail dot com> | |
pkgname=python-apex-git | |
_pkgname=apex | |
pkgver=23.08 | |
pkgrel=1 | |
pkgdesc="A PyTorch Extension: Tools for easy mixed precision and distributed training in Pytorch" | |
arch=('x86_64') | |
url="https://github.com/NVIDIA/apex" | |
_github='NVIDIA/apex' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/megatron/arguments.py b/megatron/arguments.py | |
index 4bf1d72..bcea6ce 100644 | |
--- a/megatron/arguments.py | |
+++ b/megatron/arguments.py | |
@@ -1090,7 +1090,7 @@ def _add_distributed_args(parser): | |
default=False, help='If set, use custom-built ring exchange ' | |
'for p2p communications. Note that this option will require ' | |
'a custom built image that support ring-exchange p2p.') | |
- group.add_argument('--local_rank', type=int, default=None, | |
+ group.add_argument('--local-rank', type=int, default=None, |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import os | |
import sys | |
import shlex | |
os.putenv('RANK', os.getenv('PMI_RANK')) | |
os.putenv('WORLD_SIZE', os.getenv('PMI_SIZE')) | |
os.putenv('LOCAL_RANK', os.getenv('PMI_LOCAL_RANK')) | |
os.putenv('LOCAL_SIZE', os.getenv('PMI_LOCAL_SIZE')) | |
argv = [] | |
if os.getenv('PMI_RANK') == '0': |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <fcntl.h> | |
#include <stdint.h> | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <sys/mman.h> | |
#include <time.h> | |
#include <unistd.h> | |
#define BASE_SIZE 128 // array base size = 1KB | |
#define MAX_SIZE 17 // array size ranges from 1KB to 64MB |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[Unit] | |
Description=Reverse SSH connection | |
After=network.target | |
[Service] | |
Type=simple | |
ExecStart=/usr/bin/ssh -N -T -o "ServerAliveInterval 10" -o "ExitOnForwardFailure yes" -R 2222:localhost:22 chengscott@host | |
Restart=always | |
RestartSec=5s |
NewerOlder