Skip to content

Instantly share code, notes, and snippets.

View conceptofmind's full-sized avatar
💭
The ghost in the machine - 从石头挤水

Enrico Shippole conceptofmind

💭
The ghost in the machine - 从石头挤水
View GitHub Profile
# coding=utf-8
# Copyright 2022 EleutherAI and the HuggingFace Inc. team. All rights reserved.
#
# This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX
# and OPT implementations in this library. It has been modified from its
# original forms to accommodate minor architectural differences compared
# to GPT-NeoX and OPT used by the Meta AI team that trained the model.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
import torch
from datasets import load_dataset
import argparse
import os
import math
import time
import random
import wandb
from huggingface_hub import HfApi, HfFolder
from huggingface_hub.utils._errors import HfHubHTTPError
import re
from my_secret_keys import api_key, cse_id
from googleapiclient.discovery import build
from fuzzywuzzy import fuzz
from playwright.sync_api import sync_playwright
# get the google search api result
def google_search(input_query: str, api_key: str, cse_id: str):
num_results = 3
import torch
from datasets import load_dataset
import argparse
import os
import math
from itertools import chain
from datetime import timedelta
from torch.utils.data import DataLoader
from accelerate import Accelerator
from accelerate.utils import (DummyOptim, DummyScheduler,
@conceptofmind
conceptofmind / c4_filters.py
Last active April 22, 2023 16:40
c4_filters.py
import re
import nltk
import ftfy
import multiprocessing
from datasets import load_dataset
from langdetect import detect_langs
from nltk.tokenize import sent_tokenize, word_tokenize
nltk.download("punkt")
[
{
"generated_text": " the trainz frorum. A little bit more patience to iron out little critters and cracks by the N3V crew. they have a mountain of routes [Retrieval( \"Glad Roy\")] from me to get ready so give them credit and energy to get things done the soonest and well.\n",
"index": 32,
"Retrieval": [
"Thanks Roy. yes when you purchase my routes it will show you what to dowload for eg.",
"Thanks a million Roy! You are a wonderful and patient support.",
"Roy you're awesome! I just finished installing \"Rogers Pass\" and I am in shock at how beautiful the route looks in TS12."
],
"Generated": "\n the trainz frorum. A little bit more patience to iron out little critters and cracks by the N3V crew. they have a mountain of routes [Retrieval( \"Glad Roy\")] from me to get ready so give them credit and energy to get things done the soonest and well.\n",
import os
import pandas as pd
from tqdm import tqdm
from IPython.display import display
from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate, FewShotPromptTemplate
from langchain.chains import LLMChain
from utils import evaluate, ColBERTv2
from langchain.agents import initialize_agent, Tool, ZeroShotAgent, ConversationalAgent, AgentExecutor
from langchain import GoogleSearchAPIWrapper
@conceptofmind
conceptofmind / t5_pytorch_test.py
Last active January 3, 2023 05:42
t5_pytorch_test
import torch
from torch import nn
import torch.nn.functional as F
import math
from einops import rearrange
def exists(val):
return val is not None
import gzip
import hashlib
import json
import multiprocessing
import os
import shutil
import time
import re
from pathlib import Path
import json
import multiprocessing as mp
import re
from collections import defaultdict
from functools import partial
from typing import Dict, List, Optional, Set, Tuple, Type
from datasets import Dataset
from tqdm import tqdm