This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | from transformers import pipeline | |
| nlp = pipeline("question-answering") | |
| context = r""" | |
| Extractive Question Answering is the task of extracting an answer from a text given a question. An example of a | |
| question answering dataset is the SQuAD dataset, which is entirely based on that task. If you would like to fine-tune | |
| a model on a SQuAD task, you may leverage the examples/question-answering/run_squad.py script. | |
| """ | |
| result = nlp(question="What is extractive question answering?", context=context) | |
| print(f"Answer: '{result['answer']}', score: {round(result['score'], 4)}, start: {result['start']}, end: {result['end']}") | |
| result = nlp(question="What is a good example of a question answering dataset?", context=context) | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | import pandas as pd | |
| import re | |
| import spacy | |
| import neuralcoref | |
| nlp = spacy.load('en_core_web_lg') | |
| neuralcoref.add_to_pipe(nlp) | |
| def get_entity_pairs(text, coref=True): | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | def get_dict_of_doc(doc_words, vocabulary): | |
| wordDict = dict.fromkeys(vocabulary, 0) | |
| for word in doc_words: | |
| wordDict[word]+=1 | |
| return wordDict | |
| def computeTF(wordDict, bow): | |
| tfDict = {} | |
| bowCount = len(bow) | |
| for word, count in wordDict.items(): | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | from multiprocessing import Pool | |
| import multiprocessing | |
| print(multiprocessing.cpu_count()) | |
| def parallel_processing(func, data, cores=4): | |
| with Pool(cores) as p: | |
| outputs = list(tqdm(p.map(func, data), total = len(data))) | |
| return outputs | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | import requests | |
| import bs4 | |
| def get_css_text(url, css): | |
| res = requests.get(url, verify=False) | |
| soup = bs4.BeautifulSoup(res.text) | |
| aa = soup.select(css) | |
| aa = [a.text for a in aa] | |
| return aa | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | from tqdm.notebook import tqdm | |
| import threading | |
| def get_data(ll, req_func, num_threads=50): | |
| final_data = [] | |
| n = len(ll) | |
| for ii in tqdm(range(0, n, num_threads)): | |
| threads = list() | |
| for index in range(num_threads): | |
| if (ii+index) >= n: | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | # Create bigrams | |
| from nltk.collocations import BigramCollocationFinder, BigramAssocMeasures | |
| # Tokenize to list | |
| words = [] | |
| for sent in data_lemmatized: | |
| for token in sent.split(): | |
| words.append(token) | |
| # Create bigram with pmi | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | from sklearn.ensemble import RandomForestRegressor | |
| from sklearn.metrics import mean_absolute_error | |
| # Function for comparing different approaches | |
| def score_dataset(X_train, X_valid, y_train, y_valid): | |
| model = RandomForestRegressor(n_estimators=10, random_state=0) | |
| model.fit(X_train, y_train) | |
| preds = model.predict(X_valid) | |
| return mean_absolute_error(y_valid, preds) | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | import lightgbm as lgb | |
| dtrain = lgb.Dataset(train[feature_cols], label=train['is_attributed']) | |
| dvalid = lgb.Dataset(valid[feature_cols], label=valid['is_attributed']) | |
| dtest = lgb.Dataset(test[feature_cols], label=test['is_attributed']) | |
| param = {'num_leaves': 64, 'objective': 'binary'} | |
| param['metric'] = 'auc' | |
| num_round = 1000 | |
| bst = lgb.train(param, dtrain, num_round, valid_sets=[dvalid], early_stopping_rounds=10) | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | function sleep(ms) { | |
| return new Promise(resolve => setTimeout(resolve, ms)); | |
| } | |
| var divs = document.querySelectorAll('#details'); | |
| console.log(divs.length) | |
| for (i = 0; i < divs.length; ++i) { | |
| divs[i].click(); | |
| await sleep(5000); | 
NewerOlder