Skip to content

Instantly share code, notes, and snippets.

View tiandiao123's full-sized avatar

Cuiqing Li (李崔卿) tiandiao123

  • Shanghai, China
View GitHub Profile
import torch
import time
from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer
from argparse import ArgumentParser
from transformers import LlamaForCausalLM, LlamaTokenizer
from inference import CaiInferenceConfig, convert_to_ds_model, recover_from_ds_model
parser = ArgumentParser()
import os
import torch
import numpy as np
from deepspeed.ops.transformer.inference.triton.attention import compute_attention as deepspeed_compute_attention
from inference.ops.self_attention import self_attention_compute_using_triton
def run_func(func, qkv):
func(qkv,
#include <iostream>
#include <cmath>
#include <thread>
#include <future>
#include <functional>
using namespace std;
int f(int x, int y){
return std::pow(x, y);
#include <future>
#include <iostream>
#include <thread>
using namespace std;
int factorial(std::shared_future<int> f){
int N = f.get();
int res = 1;
for(int i=2;i<=N;i++){
#include <future>
#include <iostream>
#include <thread>
using namespace std;
int factorial(std::future<int>& f){
int N = f.get();
int res = 1;
for(int i=2;i<=N;i++){
#include <future>
#include <iostream>
using namespace std;
int factorial(int N){
if(N == 1){
return 1;
}
#include <iostream>
#include <thread>
#include <future>
#include <deque>
using namespace std;
std::deque<std::packaged_task<int()>> task_q;
std::mutex mu;
std::condition_variable cond;
#include <string>
#include <iostream>
#include <future>
#include <thread>
using namespace std;
void TestFuture(promise<string> p){
this_thread::sleep_for(3s);
cout << "begin setting value ... " << endl;
#include <iostream>
#include <thread>
#include <mutex>
using namespace std;
void SystemInit(){
cout << "hello world! "<< endl;
}
#include <thread>
#include <iostream>
using namespace std;
class MyThread
{
public:
void Main(){
cout << "my thread main fuction" << name << " : " << age << endl;
}