raja-1996

## question_answering_transformers
from transformers import pipeline
nlp = pipeline("question-answering")
context = r"""
Extractive Question Answering is the task of extracting an answer from a text given a question. An example of a
question answering dataset is the SQuAD dataset, which is entirely based on that task. If you would like to fine-tune
a model on a SQuAD task, you may leverage the examples/question-answering/run_squad.py script.
"""
result = nlp(question="What is extractive question answering?", context=context)
print(f"Answer: '{result['answer']}', score: {round(result['score'], 4)}, start: {result['start']}, end: {result['end']}")
result = nlp(question="What is a good example of a question answering dataset?", context=context)

## subject_relation_object_extraction
import pandas as pd
import re
import spacy
import neuralcoref

nlp = spacy.load('en_core_web_lg')
neuralcoref.add_to_pipe(nlp)


def get_entity_pairs(text, coref=True):

## tf_idf
def get_dict_of_doc(doc_words, vocabulary):
    wordDict = dict.fromkeys(vocabulary, 0)
    for word in doc_words:
        wordDict[word]+=1
    return wordDict

def computeTF(wordDict, bow):
    tfDict = {}
    bowCount = len(bow)
    for word, count in wordDict.items():

## parallel
from multiprocessing import Pool
import multiprocessing

print(multiprocessing.cpu_count())
def parallel_processing(func, data, cores=4):
    with Pool(cores) as p:
        outputs = list(tqdm(p.map(func, data), total = len(data)))
    return outputs

## get css text

import requests
import bs4

def get_css_text(url, css):
    res = requests.get(url, verify=False)
    soup = bs4.BeautifulSoup(res.text)
    aa = soup.select(css)
    aa = [a.text for a in aa]
    return aa

## Parallel Requests
from tqdm.notebook import tqdm
import threading

def get_data(ll, req_func, num_threads=50):
  final_data = []
  n = len(ll)
  for ii in tqdm(range(0, n, num_threads)):
    threads = list()
    for index in range(num_threads):
        if (ii+index) >= n:

## bigrams_to_unigram_collocation_pmi
# Create bigrams
from nltk.collocations import BigramCollocationFinder, BigramAssocMeasures

# Tokenize to list
words = []
for sent in data_lemmatized:
    for token in sent.split():
        words.append(token)

# Create bigram with pmi

## RandomForestRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error

# Function for comparing different approaches
def score_dataset(X_train, X_valid, y_train, y_valid):
    model = RandomForestRegressor(n_estimators=10, random_state=0)
    model.fit(X_train, y_train)
    preds = model.predict(X_valid)
    return mean_absolute_error(y_valid, preds)

## lightgbm
import lightgbm as lgb

dtrain = lgb.Dataset(train[feature_cols], label=train['is_attributed'])
dvalid = lgb.Dataset(valid[feature_cols], label=valid['is_attributed'])
dtest = lgb.Dataset(test[feature_cols], label=test['is_attributed'])

param = {'num_leaves': 64, 'objective': 'binary'}
param['metric'] = 'auc'
num_round = 1000
bst = lgb.train(param, dtrain, num_round, valid_sets=[dvalid], early_stopping_rounds=10)

## Like all videos of youtube channel
function sleep(ms) {
  return new Promise(resolve => setTimeout(resolve, ms));
}

var divs = document.querySelectorAll('#details');
console.log(divs.length)
for (i = 0; i < divs.length; ++i) {
  divs[i].click();
  await sleep(5000);
	from transformers import pipeline
	nlp = pipeline("question-answering")
	context = r"""
	Extractive Question Answering is the task of extracting an answer from a text given a question. An example of a
	question answering dataset is the SQuAD dataset, which is entirely based on that task. If you would like to fine-tune
	a model on a SQuAD task, you may leverage the examples/question-answering/run_squad.py script.
	"""
	result = nlp(question="What is extractive question answering?", context=context)
	print(f"Answer: '{result['answer']}', score: {round(result['score'], 4)}, start: {result['start']}, end: {result['end']}")
	result = nlp(question="What is a good example of a question answering dataset?", context=context)
	import pandas as pd
	import re
	import spacy
	import neuralcoref

	nlp = spacy.load('en_core_web_lg')
	neuralcoref.add_to_pipe(nlp)


	def get_entity_pairs(text, coref=True):
	def get_dict_of_doc(doc_words, vocabulary):
	wordDict = dict.fromkeys(vocabulary, 0)
	for word in doc_words:
	wordDict[word]+=1
	return wordDict

	def computeTF(wordDict, bow):
	tfDict = {}
	bowCount = len(bow)
	for word, count in wordDict.items():
	from multiprocessing import Pool
	import multiprocessing

	print(multiprocessing.cpu_count())
	def parallel_processing(func, data, cores=4):
	with Pool(cores) as p:
	outputs = list(tqdm(p.map(func, data), total = len(data)))
	return outputs

	import requests
	import bs4

	def get_css_text(url, css):
	res = requests.get(url, verify=False)
	soup = bs4.BeautifulSoup(res.text)
	aa = soup.select(css)
	aa = [a.text for a in aa]
	return aa
	from tqdm.notebook import tqdm
	import threading

	def get_data(ll, req_func, num_threads=50):
	final_data = []
	n = len(ll)
	for ii in tqdm(range(0, n, num_threads)):
	threads = list()
	for index in range(num_threads):
	if (ii+index) >= n:
	# Create bigrams
	from nltk.collocations import BigramCollocationFinder, BigramAssocMeasures

	# Tokenize to list
	words = []
	for sent in data_lemmatized:
	for token in sent.split():
	words.append(token)

	# Create bigram with pmi
	from sklearn.ensemble import RandomForestRegressor
	from sklearn.metrics import mean_absolute_error

	# Function for comparing different approaches
	def score_dataset(X_train, X_valid, y_train, y_valid):
	model = RandomForestRegressor(n_estimators=10, random_state=0)
	model.fit(X_train, y_train)
	preds = model.predict(X_valid)
	return mean_absolute_error(y_valid, preds)
	import lightgbm as lgb

	dtrain = lgb.Dataset(train[feature_cols], label=train['is_attributed'])
	dvalid = lgb.Dataset(valid[feature_cols], label=valid['is_attributed'])
	dtest = lgb.Dataset(test[feature_cols], label=test['is_attributed'])

	param = {'num_leaves': 64, 'objective': 'binary'}
	param['metric'] = 'auc'
	num_round = 1000
	bst = lgb.train(param, dtrain, num_round, valid_sets=[dvalid], early_stopping_rounds=10)
	function sleep(ms) {
	return new Promise(resolve => setTimeout(resolve, ms));
	}

	var divs = document.querySelectorAll('#details');
	console.log(divs.length)
	for (i = 0; i < divs.length; ++i) {
	divs[i].click();
	await sleep(5000);