This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import os | |
| import re | |
| import json | |
| # import openai | |
| import argparse | |
| import datetime | |
| import tiktoken | |
| import feedparser | |
| from functools import partial | |
| from tqdm import tqdm |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| from datasets import load_dataset | |
| import time | |
| from tqdm import tqdm | |
| import wandb | |
| import torch | |
| import random | |
| import logging | |
| import argparse | |
| from pysnooper import snoop |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| ARG PYTORCH="2.1.0" | |
| ARG CUDA="12.2" | |
| # ARG CUDNN="8" | |
| # FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-devel | |
| FROM nvcr.io/nvidia/pytorch:23.08-py3 | |
| ######################## | |
| SHELL [ "/bin/bash","-c" ] | |
| ENV DEBIAN_FRONTEND=noninteractive |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import torch | |
| def multihead_attention(query, key, value, num_heads): | |
| # Get the dimensionality of the input tensors | |
| d_model = query.size(-1) # (batch_size, seq_len, d_model) | |
| # Split the query, key, and value tensors into multiple heads | |
| query_heads = query.view(*query.size()[:-1], num_heads, d_model // num_heads).permute(0, 2, 1, 3) # (b, h, seq_len, d_model // h) | |
| key_heads = key.view(*key.size()[:-1], num_heads, d_model // num_heads).permute(0, 2, 1, 3) | |
| value_heads = value.view(*value.size()[:-1], num_heads, d_model // num_heads).permute(0, 2, 1, 3) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import torch | |
| import numpy as np | |
| def rotary_position_embedding(x, base=10000, extrapolation=1): | |
| n_pos, dim = x.shape[1], x.shape[-1] | |
| assert dim % 2 == 0 # Normally, hidden dim can be divided by 2 | |
| sinusoidal_pos = np.array( | |
| [ | |
| [pos / np.power(base ** (2 / dim) * extrapolation ** (2 / (dim - 2)), i) for i in range(dim // 2)] # NTK rope from https://kexue.fm/archives/9675 | |
| for pos in range(n_pos) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/bash | |
| # ==================================================== | |
| # Copyright (C) 2022 All rights reserved. | |
| # | |
| # Author : Xinyu Zhu | |
| # Email : zhuxy21@mails.tsinghua.edu.cn | |
| # File Name : download_huggingface_pretrained_models.sh | |
| # Last Modified : 2022-03-22 21:52 | |
| # Describe : | |
| # |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // ==UserScript== | |
| // @name arxiv2readpaper | |
| // @namespace http://tampermonkey.net/ | |
| // @version 1.1 | |
| // @description 1.go to readpaper.com;2.rename downloaded paper | |
| // @author Yuhang Modified:wanng | |
| // @match https://arxiv.org* | |
| // @include https://arxiv.org* | |
| // @grant none | |
| // @license MIT |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| class LabelSmoothingCrossEntropy(nn.Module): | |
| """ | |
| Implementation of Label Smoothing Cross Entropy Loss | |
| Modified from fastai https://github.com/fastai/fastai2/blob/master/fastai2/layers.py#L338 | |
| """ | |
| def __init__(self, eps=0.1, reduction='mean', ignore_index=-100): | |
| super(LabelSmoothingCrossEntropy, self).__init__() | |
| self.eps = eps | |
| self.reduction = reduction | |
| self.ignore_index = ignore_index |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # -*- coding: utf-8 -*- | |
| """ | |
| Created on Tue Aug 22 19:41:55 2017 | |
| @author: Quantum Liu | |
| https://github.com/QuantumLiu/tf_gpu_manager | |
| """ | |
| ''' | |
| Example: | |
| gm=GPUManager() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| " ==================================================== | |
| " Copyright (C) 2021 All rights reserved. | |
| " | |
| " Author : Xinyu Zhu | |
| " Email : zhuxy21@mails.tsinghua.edu.cn | |
| " File Name : .vimrc | |
| " Last Modified : 2021-12-17 22:54 | |
| " Describe : | |
| " | |
| " ==================================================== |
NewerOlder