Jan Trienes jantrienes

## README.md

      
              4 files
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                jantrienes
                / README.md
            
            
              Last active
              February 1, 2024 08:50
            
              
                Train a huggingface text classification model
              
          
    Tranformers Text Classifier Example

Was used to train this classifier: https://huggingface.co/jantrienes/roberta-large-question-classifier

  
## duplicates.py
import itertools
from pprint import pprint
from typing import List, Tuple


def jaccard_similarity(list1: List[str], list2: List[str]):
    if not list1 or not list2:
        return 0
    s1 = set(list1)
    s2 = set(list2)

## plot_confusion_matrix.py
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

def plot_confusion_matrix(y_true, y_pred, class_names, normalize=None,
                          title='Confusion Matrix', plot_numbers=False, display_names=None,
                          figsize=(15, 11)):

    cm = confusion_matrix(y_true, y_pred, labels=class_names, normalize=normalize)

## matplotlib-config.py
%matplotlib inline
%config InlineBackend.figure_format='retina'

import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
import seaborn as sns
sns.set_color_codes()
sns.set_theme()
sns.set_context("paper")

## eli5_pipeline.py
import pandas as pd
from sklearn.compose import ColumnTransformer
from sklearn.datasets import fetch_20newsgroups
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler

import eli5

## timing.py
from functools import wraps
from timeit import default_timer as timer

def timing(f):
    @wraps(f)
    def wrap(*args, **kw):
        ts = timer()
        result = f(*args, **kw)
        te = timer()
        print('func: {} took: {:2.4f} sec'.format(f.__name__, te - ts))

## precision-vs-recall.md

      
              3 files
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                jantrienes
                / precision-vs-recall.md
            
            
              Last active
              June 4, 2019 14:51
            
              
                Precision/Recall Curve at Operating Levels
              
          
    Visualize Precision/Recall Tradeoff at Operating Levels

Sometimes, it can be interesting to visualize precision vs. recall at different operating levels. For example, in a binary classification problem one can adjust the default classification threshold of the positive class (i.e., T = 0.5) to be either more or less conservative. This in turn changes precision and recall.
This utility allows to visualize the precision/recall that could be achieved when setting the classification threshold to a desired level of precision/recall.
Example:
from matplotlib import rcParams

  
## interleaving.py
def interleave(list_a, list_b):
    return _interleave(list_a, list_b, 0, 0, list())

def _interleave(list_a, list_b, k_a, k_b, combined):
    if k_a >= len(list_a) and k_b >= len(list_b):
        return combined

    if k_a == k_b:
        if list_a[k_a] not in combined:
            combined.append(list_a[k_a])

## tikz-neural-network.tex
\documentclass[tikz]{standalone}

\begin{document}

\begin{tikzpicture}
[   cnode/.style={draw=black,fill=#1,minimum width=3mm,circle},
]
	% output neuron
    \node[cnode=white, label=above:$\delta_1^{(2)}$, label=below:$y_1$] (s) at (6,-2) {};

## ChannelsListWithData.js
import React from 'react';
import {
    gql,
    graphql,
} from 'react-apollo';

import AddChannel from './AddChannel';

const ChannelsList = ({ data: {loading, error, channels }}) => {
  if (loading) {
	import itertools
	from pprint import pprint
	from typing import List, Tuple


	def jaccard_similarity(list1: List[str], list2: List[str]):
	if not list1 or not list2:
	return 0
	s1 = set(list1)
	s2 = set(list2)
	from sklearn.metrics import confusion_matrix
	import matplotlib.pyplot as plt
	import pandas as pd
	import seaborn as sns

	def plot_confusion_matrix(y_true, y_pred, class_names, normalize=None,
	title='Confusion Matrix', plot_numbers=False, display_names=None,
	figsize=(15, 11)):

	cm = confusion_matrix(y_true, y_pred, labels=class_names, normalize=normalize)
	%matplotlib inline
	%config InlineBackend.figure_format='retina'

	import matplotlib.pyplot as plt
	import matplotlib.ticker as mtick
	import seaborn as sns
	sns.set_color_codes()
	sns.set_theme()
	sns.set_context("paper")
	import pandas as pd
	from sklearn.compose import ColumnTransformer
	from sklearn.datasets import fetch_20newsgroups
	from sklearn.feature_extraction.text import CountVectorizer
	from sklearn.linear_model import LogisticRegression
	from sklearn.model_selection import train_test_split
	from sklearn.pipeline import Pipeline
	from sklearn.preprocessing import MinMaxScaler

	import eli5
	from functools import wraps
	from timeit import default_timer as timer

	def timing(f):
	@wraps(f)
	def wrap(args, *kw):
	ts = timer()
	result = f(args, *kw)
	te = timer()
	print('func: {} took: {:2.4f} sec'.format(f.__name__, te - ts))
	def interleave(list_a, list_b):
	return _interleave(list_a, list_b, 0, 0, list())

	def _interleave(list_a, list_b, k_a, k_b, combined):
	if k_a >= len(list_a) and k_b >= len(list_b):
	return combined

	if k_a == k_b:
	if list_a[k_a] not in combined:
	combined.append(list_a[k_a])
	\documentclass[tikz]{standalone}

	\begin{document}

	\begin{tikzpicture}
	[ cnode/.style={draw=black,fill=#1,minimum width=3mm,circle},
	]
	% output neuron
	\node[cnode=white, label=above:$\delta_1^{(2)}$, label=below:$y_1$] (s) at (6,-2) {};
	import React from 'react';
	import {
	gql,
	graphql,
	} from 'react-apollo';

	import AddChannel from './AddChannel';

	const ChannelsList = ({ data: {loading, error, channels }}) => {
	if (loading) {