A.J antferdom

## John_Carmack_Ilya_Sutskever.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              1 star
            
          
                matijagrcic
                / John_Carmack_Ilya_Sutskever.md
            
            
              Created
              May 27, 2024 09:13
            
          
    Ref: Exclusive Q&A: John Carmack’s ‘Different Path’ to Artificial General Intelligence

"So I asked Ilya Sutskever, OpenAI’s chief scientist, for a reading list. He gave me a list of like 40 research papers and said, ‘If you really learn all of these, you’ll know 90% of what matters today.’ And I did. I plowed through all those things and it all started sorting out in my head."

Ref: https://x.com/ID_AA_Carmack/status/1622673143469858816

I rather expected @ilyasut to have made a public post by now after all the discussion of the AI reading list he gave me.
A canonical list of references from a leading figure would be appreciated by many. I would be curious myself about what he would add from the last three years.

Papers


## llm.c
#include <stdlib.h>
#include <stdbool.h>
#include <tgmath.h>
#define max(x,y) ((x>y)?x:y)
#define half __fp16

void E_(int* data0) {
  int val0 = data0[0];
  data0[0] = (val0+1);
}

## gist:7dff8fd6259b1e6d57efb772b900fd69
[55883.721977] amdgpu:   map VA 0x702eae9d2000 - 0x702eae9d3000 in entry 0000000072d2b750
[55883.721996] amdgpu:   INC mapping count 1
[55883.722133] kfd kfd: amdgpu: ioctl cmd 0xc0184b0c (#0xc), arg 0x7ffe16172bef
[55883.722238] gmc_v11_0_process_interrupt: 6 callbacks suppressed
[55883.722250] amdgpu 0000:c3:00.0: amdgpu: [gfxhub] page fault (src_id:0 ring:24 vmid:8 pasid:32774, for process python3 pid 356134 thread python3 pid 356134)
[55883.722343] amdgpu 0000:c3:00.0: amdgpu:   in page starting at address 0x00000000aabbc000 from client 10
[55883.722391] amdgpu 0000:c3:00.0: amdgpu: GCVM_L2_PROTECTION_FAULT_STATUS:0x00800A30
[55883.722429] amdgpu 0000:c3:00.0: amdgpu:      Faulty UTCL2 client ID: CPC (0x5)
[55883.722466] amdgpu 0000:c3:00.0: amdgpu:      MORE_FAULTS: 0x0
[55883.722497] amdgpu 0000:c3:00.0: amdgpu:      WALKER_ERROR: 0x0

## hip.py
# -*- coding: utf-8 -*-
#
# TARGET arch is: ['-D__HIP_PLATFORM_AMD__', '-I/opt/rocm/include']
# WORD_SIZE is: 8
# POINTER_SIZE is: 8
# LONGDOUBLE_SIZE is: 16
#
import ctypes


## memcpy.py
# tiny@tiny9:~/tinygrad$ python3 examples/benchmark_copies.py
# CPU copy 6.18 ms, 16.28 GB/s
# GPU copy 4.38 ms, 23.00 GB/s
# GPU  6x  1.85 ms, 54.54 GB/s

import time
def timeit(fxn):
  tms = []
  for _ in range(10):
    st = time.perf_counter()

## np2torch.py
from typing import Callable, List

import numpy as np
import torch
from torch._dynamo.backends.common import aot_autograd
from torch.fx.graph_module import GraphModule

# NOTE: requires torch >= 2.1.0

def np2torch(fn: Callable) -> Callable:

## gpt2_xl_perplexities.py
import torch
import torch.nn as nn
import torch.nn.functional as F

from transformers import GPT2Tokenizer, GPT2LMHeadModel

ppl_model_name = 'gpt2-xl' if device == 'cuda' else 'gpt2'
ppl_tokenizer = GPT2Tokenizer.from_pretrained(ppl_model_name)
load_opts = {
    'device_map': 'auto',

## gist.py
import torch
from transformers import AutoTokenizer, RobertaForMaskedLM, AutoConfig
from transformers.pipelines.base import infer_framework_load_model
from os import path
from huggingface_hub import hf_hub_download


def compare_models(pt_mdl, sf_mdl):
    # A blend of convert.py's generalized check_final_model with concrete usage example to demonstrate
    sf_dict = sf_mdl.state_dict()

## flash.py
import pytest
import torch

import triton
import triton.language as tl


@triton.jit
def _fwd_kernel(
    Q, K, V, sm_scale,

## CalculateDiffusion.swift
//
//  main.swift
//  CalculateDiffusion
//
//  Created by Philip Turner on 6/2/23.
//

import Foundation
import QuartzCore
import MetalPerformanceShadersGraph
	#include <stdlib.h>
	#include <stdbool.h>
	#include <tgmath.h>
	#define max(x,y) ((x>y)?x:y)
	#define half __fp16

	void E_(int* data0) {
	int val0 = data0[0];
	data0[0] = (val0+1);
	}
	[55883.721977] amdgpu: map VA 0x702eae9d2000 - 0x702eae9d3000 in entry 0000000072d2b750
	[55883.721996] amdgpu: INC mapping count 1
	[55883.722133] kfd kfd: amdgpu: ioctl cmd 0xc0184b0c (#0xc), arg 0x7ffe16172bef
	[55883.722238] gmc_v11_0_process_interrupt: 6 callbacks suppressed
	[55883.722250] amdgpu 0000:c3:00.0: amdgpu: [gfxhub] page fault (src_id:0 ring:24 vmid:8 pasid:32774, for process python3 pid 356134 thread python3 pid 356134)
	[55883.722343] amdgpu 0000:c3:00.0: amdgpu: in page starting at address 0x00000000aabbc000 from client 10
	[55883.722391] amdgpu 0000:c3:00.0: amdgpu: GCVM_L2_PROTECTION_FAULT_STATUS:0x00800A30
	[55883.722429] amdgpu 0000:c3:00.0: amdgpu: Faulty UTCL2 client ID: CPC (0x5)
	[55883.722466] amdgpu 0000:c3:00.0: amdgpu: MORE_FAULTS: 0x0
	[55883.722497] amdgpu 0000:c3:00.0: amdgpu: WALKER_ERROR: 0x0
	# -- coding: utf-8 --
	#
	# TARGET arch is: ['-D__HIP_PLATFORM_AMD__', '-I/opt/rocm/include']
	# WORD_SIZE is: 8
	# POINTER_SIZE is: 8
	# LONGDOUBLE_SIZE is: 16
	#
	import ctypes
	# tiny@tiny9:~/tinygrad$ python3 examples/benchmark_copies.py
	# CPU copy 6.18 ms, 16.28 GB/s
	# GPU copy 4.38 ms, 23.00 GB/s
	# GPU 6x 1.85 ms, 54.54 GB/s

	import time
	def timeit(fxn):
	tms = []
	for _ in range(10):
	st = time.perf_counter()
	from typing import Callable, List

	import numpy as np
	import torch
	from torch._dynamo.backends.common import aot_autograd
	from torch.fx.graph_module import GraphModule

	# NOTE: requires torch >= 2.1.0

	def np2torch(fn: Callable) -> Callable:
	import torch
	import torch.nn as nn
	import torch.nn.functional as F

	from transformers import GPT2Tokenizer, GPT2LMHeadModel

	ppl_model_name = 'gpt2-xl' if device == 'cuda' else 'gpt2'
	ppl_tokenizer = GPT2Tokenizer.from_pretrained(ppl_model_name)
	load_opts = {
	'device_map': 'auto',
	import torch
	from transformers import AutoTokenizer, RobertaForMaskedLM, AutoConfig
	from transformers.pipelines.base import infer_framework_load_model
	from os import path
	from huggingface_hub import hf_hub_download


	def compare_models(pt_mdl, sf_mdl):
	# A blend of convert.py's generalized check_final_model with concrete usage example to demonstrate
	sf_dict = sf_mdl.state_dict()
	import pytest
	import torch

	import triton
	import triton.language as tl


	@triton.jit
	def _fwd_kernel(
	Q, K, V, sm_scale,
	//
	// main.swift
	// CalculateDiffusion
	//
	// Created by Philip Turner on 6/2/23.
	//

	import Foundation
	import QuartzCore
	import MetalPerformanceShadersGraph