Skip to content

Instantly share code, notes, and snippets.

from transformers import pipeline
nlp = pipeline("question-answering")
context = r"""
Extractive Question Answering is the task of extracting an answer from a text given a question. An example of a
question answering dataset is the SQuAD dataset, which is entirely based on that task. If you would like to fine-tune
a model on a SQuAD task, you may leverage the examples/question-answering/run_squad.py script.
"""
result = nlp(question="What is extractive question answering?", context=context)
print(f"Answer: '{result['answer']}', score: {round(result['score'], 4)}, start: {result['start']}, end: {result['end']}")
result = nlp(question="What is a good example of a question answering dataset?", context=context)
import pandas as pd
import re
import spacy
import neuralcoref
nlp = spacy.load('en_core_web_lg')
neuralcoref.add_to_pipe(nlp)
def get_entity_pairs(text, coref=True):
def get_dict_of_doc(doc_words, vocabulary):
wordDict = dict.fromkeys(vocabulary, 0)
for word in doc_words:
wordDict[word]+=1
return wordDict
def computeTF(wordDict, bow):
tfDict = {}
bowCount = len(bow)
for word, count in wordDict.items():
from multiprocessing import Pool
import multiprocessing
print(multiprocessing.cpu_count())
def parallel_processing(func, data, cores=4):
with Pool(cores) as p:
outputs = list(tqdm(p.map(func, data), total = len(data)))
return outputs
import requests
import bs4
def get_css_text(url, css):
res = requests.get(url, verify=False)
soup = bs4.BeautifulSoup(res.text)
aa = soup.select(css)
aa = [a.text for a in aa]
return aa
from tqdm.notebook import tqdm
import threading
def get_data(ll, req_func, num_threads=50):
final_data = []
n = len(ll)
for ii in tqdm(range(0, n, num_threads)):
threads = list()
for index in range(num_threads):
if (ii+index) >= n:
# Create bigrams
from nltk.collocations import BigramCollocationFinder, BigramAssocMeasures
# Tokenize to list
words = []
for sent in data_lemmatized:
for token in sent.split():
words.append(token)
# Create bigram with pmi
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error
# Function for comparing different approaches
def score_dataset(X_train, X_valid, y_train, y_valid):
model = RandomForestRegressor(n_estimators=10, random_state=0)
model.fit(X_train, y_train)
preds = model.predict(X_valid)
return mean_absolute_error(y_valid, preds)
@raja-1996
raja-1996 / lightgbm
Last active February 26, 2020 08:23
import lightgbm as lgb
dtrain = lgb.Dataset(train[feature_cols], label=train['is_attributed'])
dvalid = lgb.Dataset(valid[feature_cols], label=valid['is_attributed'])
dtest = lgb.Dataset(test[feature_cols], label=test['is_attributed'])
param = {'num_leaves': 64, 'objective': 'binary'}
param['metric'] = 'auc'
num_round = 1000
bst = lgb.train(param, dtrain, num_round, valid_sets=[dvalid], early_stopping_rounds=10)
function sleep(ms) {
return new Promise(resolve => setTimeout(resolve, ms));
}
var divs = document.querySelectorAll('#details');
console.log(divs.length)
for (i = 0; i < divs.length; ++i) {
divs[i].click();
await sleep(5000);