Created
August 29, 2021 11:33
-
-
Save shahrukhx01/0d8f75bbfbf27703649464d7cbb712be to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## import relevant packages | |
import pandas as pd | |
import numpy as np | |
import random | |
import ast | |
from typing import List, Union, Dict, Optional | |
import logging | |
import torch | |
from transformers import AutoTokenizer, AutoModel | |
from farm.infer import Inferencer | |
from beir import util, LoggingHandler | |
from beir.datasets.data_loader import GenericDataLoader | |
from beir.retrieval.evaluation import EvaluateRetrieval | |
from beir.retrieval.search.dense import DenseRetrievalExactSearch as DRES | |
import logging | |
import pathlib, os | |
logging.basicConfig() | |
logging.getLogger(__name__).setLevel(logging.INFO) | |
## Load TREC COVID dataset | |
dataset = "trec-covid" | |
url = "https://public.ukp.informatik.tu-darmstadt.de/thakur/BEIR/datasets/{}.zip".format(dataset) | |
data_path = util.download_and_unzip(url, "datasets") | |
corpus, queries, qrels = GenericDataLoader(data_folder=data_path).load(split="test") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment