Skip to content

Instantly share code, notes, and snippets.

View colesbury's full-sized avatar

Sam Gross colesbury

View GitHub Profile
diff --git a/Makefile.pre.in b/Makefile.pre.in
index 21b122ae0f..37a8b06987 100644
--- a/Makefile.pre.in
+++ b/Makefile.pre.in
@@ -439,6 +439,7 @@ PYTHON_OBJS= \
Python/modsupport.o \
Python/mysnprintf.o \
Python/mystrtoul.o \
+ Python/object_stack.o \
Python/optimizer.o \
import time
import json
import math
import subprocess
LOOPS = 1000
def main():
times = []
for i in range(1):
185.2% logging_silent
122.9% deltablue
107.0% richards
89.2% unpack_sequence
87.2% go
71.8% fannkuch
68.1% scimark_sor
61.5% unpickle_pure_python
60.8% pyflate
60.4% logging_simple
==================
WARNING: ThreadSanitizer: data race (pid=48373)
Read of size 8 at 0x7b5000000080 by main thread:
#0 memcpy <null> (python+0x449377)
#1 _Py_qsbr_register /private/home/sgross/scratch/nogil/Python/qsbr.c:192:5 (python+0x68b550)
#2 new_threadstate /private/home/sgross/scratch/nogil/Python/pystate.c:940:20 (python+0x67fc02)
#3 _PyThreadState_Prealloc /private/home/sgross/scratch/nogil/Python/pystate.c:986:12 (python+0x67fdc7)
#4 thread_PyThread_start_new_thread /private/home/sgross/scratch/nogil/./Modules/_threadmodule.c:1338:29 (python+0x71ba6e)
#5 cfunction_call /private/home/sgross/scratch/nogil/Objects/methodobject.c:471:18 (python+0x791ac9)
#6 _PyObject_MakeTpCall /private/home/sgross/scratch/nogil/Objects/call.c:189:18 (python+0x4e0d46)
import threading
from queue import Queue
import numpy as np
def thread1(queue):
queue.get().fill(5)
def thread2(queue):
queue.get().resize((1, 1))
INFO:root:World size is : 8
INFO:root:Running without GIL
INFO:root:Set start method of multiprocessing to spawn
INFO:root:Using the config
{"seed": 0, "data": {"dataset": "iamdb", "data_path": "/datasets01/iamdb/060820/", "img_height": 64}, "criterion_type": "transducer", "criterion": {"blank": true, "allow_repeats": false, "ngram": 0}, "model_type": "tds2d", "model": {"depth": 4, "tds_groups": [{"channels": 4, "num_blocks": 3, "stride": [2, 2]}, {"channels": 16, "num_blocks": 3, "stride": [2, 2]}, {"channels": 32, "num_blocks": 3, "stride": [2, 1]}, {"channels": 64, "num_blocks": 3, "stride": [2, 1]}], "kernel_size": [5, 7], "dropout": 0.1}, "optim": {"batch_size": 32, "epochs": 400, "learning_rate": 0.1, "crit_learning_rate": 0.1, "step_size": 100, "max_grad_norm": 5}}
INFO:root:Loading dataset ...
INFO:root:Loading model ...
INFO:root:Training tds2d model with 2,644,859 parameters.
INFO:root:Starting training ...
INFO:root:Epoch 1 started.
import random
import os
import tempfile
import torch
import torch.distributed as dist
import torch.nn as nn
import torch.optim as optim
import torch.multiprocessing as mp
from torch.nn.parallel import DistributedDataParallel as DDP
import gc
from time import sleep
import numpy as np
print('allocating memory')
pile=[]
for i in range(1500):
for x in range(4):
pile.append(np.ones((256, 64), dtype=np.float32))
pile.append(np.ones((256, 64), dtype=np.float32))
// Compile with g++ -O3 alloc.cpp -lnuma
#include <vector>
#include <memory>
#include <iostream>
#include <unistd.h>
#include <assert.h>
#include <numa.h>
#include <numaif.h>
#include <vector>
#include <memory>
#include <iostream>
#include <unistd.h>
#include <assert.h>
#include <numa.h>
#include <numaif.h>
int USE_MBIND = 0;