Marek Kolodziej mkolod

## dft_idft.ipynb

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                mkolod
                / dft_idft.ipynb
            
            
              Created
              July 7, 2021 02:31
            
              
                DFT / IDFT
              
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## parallel_reservoir_sampling_equivalent.ipynb

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                mkolod
                / parallel_reservoir_sampling_equivalent.ipynb
            
            
              Created
              June 9, 2021 16:59
            
              
                Parallel Reservoir Sampling Equivalent
              
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## matmul_via_inner_and_outer_product.ipynb

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                mkolod
                / matmul_via_inner_and_outer_product.ipynb
            
            
              Created
              January 18, 2021 20:21
            
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## disjoint_set_forest.py
class DisjointForest:

    class Subset:

        def __init__(self, elem, parent=None, rank=0):
            self.elem = elem
            self.parent = parent
            self.rank = rank

        def __repr__(self):

## worker_thread_vs_async.cpp
#include <chrono>
#include <cmath>
#include <future>
#include <iostream>
#include <memory>
#include <mutex>
#include <thread>

template<typename Ret, typename Fun, typename Arg>
class ReusableWorkerThreadWithFuture {

## torch_1.6_fork_wait.py
# NOTE: The network here is not means to make any sense. It's just for measuring perf impact.
import torch
import torch.nn.functional as F
from time import time

class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        fcs = [torch.nn.Linear(10, 100)] + [torch.nn.Linear(100, 100) for _ in range(20)]
        self.fcs = torch.nn.Sequential(*fcs)

## pointwise_multi_thread_multi_stream.cu
#include <stdio.h>
#include <thread>
#include <chrono>
#include <iostream>

const int N = 1 << 20;

__global__ void kernel(float *x, int n)
{
    int tid = threadIdx.x + blockIdx.x * blockDim.x;

## multi_streaming_to_reduce_launch_latency.cu
#include <chrono>
#include <iostream>
#include <vector>
#include <thread>

__global__ void do_nothing(int time_us, int clock_rate) {
  clock_t start = clock64();
  clock_t end;
  for (;;) {
    end = clock64();

## redirect_streams_and_cuda_checks.cu
#include <csignal>
#include <cstdlib>
#include <fstream>
#include <iostream>
#include <unistd.h>
#include <limits.h>

#include <iostream>
#include <sstream>
#include <stdexcept>

## cleaning_up_imagenet_valset.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              1 star
            
          
                mkolod
                / cleaning_up_imagenet_valset.md
            
            
              Last active
              August 26, 2020 14:05
            
          
    ImageNet validation set fix:

The training set is organized in directories, with each directory matching a class, e.g. "n01751748" matching "sea snake." However, the valset is a flat dir of JPEGs.
The ImageNet labels provided in the devkit for the validation set (ILSVRC2012_validation_ground_truth.txt) are not consistent with the ordering used by PyTorch/TF/Keras/MXNet/Caffe, etc. for pre-trained models.
For example, in the the above ground truth label file, "sea snake" is 490, but in PyTorch/TF, it's 65.

Proof:

ImageNet validation labels: https://gist.githubusercontent.com/aaronpolhamus/964a4411c0906315deb9f4a3723aac57/raw/aa66dd9dbf6b56649fa3fab83659b2acbf3cbfd1/map_clsloc.txt
DL framework labels: https://gist.githubusercontent.com/aaronpolhamus/964a4411c0906315deb9f4a3723aac57/raw/aa66dd9dbf6b56649fa3fab83659b2acbf3cbfd1/map_clsloc.txt


Untar the valset file, you will get a flat dir of JPEGs.
Pull in the unflattening script into the directory where the val images were unpa
	class DisjointForest:

	class Subset:

	def __init__(self, elem, parent=None, rank=0):
	self.elem = elem
	self.parent = parent
	self.rank = rank

	def __repr__(self):
	#include <chrono>
	#include <cmath>
	#include <future>
	#include <iostream>
	#include <memory>
	#include <mutex>
	#include <thread>

	template<typename Ret, typename Fun, typename Arg>
	class ReusableWorkerThreadWithFuture {
	# NOTE: The network here is not means to make any sense. It's just for measuring perf impact.
	import torch
	import torch.nn.functional as F
	from time import time

	class Net(torch.nn.Module):
	def __init__(self):
	super(Net, self).__init__()
	fcs = [torch.nn.Linear(10, 100)] + [torch.nn.Linear(100, 100) for _ in range(20)]
	self.fcs = torch.nn.Sequential(*fcs)
	#include <stdio.h>
	#include <thread>
	#include <chrono>
	#include <iostream>

	const int N = 1 << 20;

	__global__ void kernel(float *x, int n)
	{
	int tid = threadIdx.x + blockIdx.x * blockDim.x;
	#include <chrono>
	#include <iostream>
	#include <vector>
	#include <thread>

	__global__ void do_nothing(int time_us, int clock_rate) {
	clock_t start = clock64();
	clock_t end;
	for (;;) {
	end = clock64();
	#include <csignal>
	#include <cstdlib>
	#include <fstream>
	#include <iostream>
	#include <unistd.h>
	#include <limits.h>

	#include <iostream>
	#include <sstream>
	#include <stdexcept>