Maximilian Strauss straussmaximilian

## 07_eda.py
import pandas as pd
import re
from collections import Counter
import spacy
from tqdm import tqdm as tqdm
from urllib.parse import urlparse
import matplotlib.pyplot as plt

# if en_core_web_sm is not installed
# !python -m spacy download en_core_web_sm

## 06_annotation.py
import streamlit as st
from PIL import Image
import os
import random

state = st.session_state

BASE_PATH = "./DICE_IMAGES_100/"
OPTIONS = ["1", "2", "3", "4", "5", "6", "NA"]

## 04_perfplot2.py
def fixed_query(size):
    """
    Returns a random array of given size and
    a fixed number of random query points.
    """
    array = random_array(size)
    query_points = random_array(1e4)
    return (array, query_points)


## 04_kdtree.py
from scipy.spatial import cKDTree

def kdtree(data, delta=0.1):
    """
    Constructs a 2D k-d-tree from the input array and queries the points within a square around a given point.
    """
    array, query_points = data
    tree = cKDTree(array)
    count = 0
    for point in query_points:

## 04_multiple_01.py
@njit
def boolean_index_numba_multiple(array, xmin, xmax, ymin, ymax, zmin, zmax):
    """
    Takes a numpy array and isolates all points that are within [xmin, xmax]
    for the first dimension, between [ymin, ymax] for the second dimension
    and [zmin, zmax]  for the third dimension by creating a boolean index.
    This function will be compiled with numba.
    """
    index = ((array[:, 0] > xmin) & (array[:,  1] > ymin) & (array[:, 2] > zmin)
             & (array[:, 0] < xmax) & (array[:, 1] < ymax) & (array[:, 2] < zmax))

## 04_perfplot_1.py
import perfplot

plt.figure(figsize=(10, 10))
plt.title('Quantitative Comparison of Filtering Speeds')
perfplot.show(
    setup=random_array,
    kernels=[loop, boolean_index, loop_numba, boolean_index_numba],
    n_range=[2**k for k in range(2, 22)],
    logx=True,
    logy=True,

## 04_pandas.py
#Pandas

import pandas as pd
df = pd.DataFrame({'x': array[:, 0], 'y': array[:, 1], 'z': array[:, 2]})

# Pandas query
print('Pandas Query:\t\t', end='')
%timeit df.query('x >= 0.2 and x <= 0.4 and y >= 0.4 and y <= 0.6')

# Pandas eval

## 04_numba_functions.py
from numba.typed import List
from numba import njit

@njit
def boolean_index_numba(array):
    """
    Takes a numpy array and isolates all points that are within [0.2,0.4] for
    the first dimension and between [0.4,0.6] for the second dimension
    by creating a boolean index.
    This function will be compiled with numba.

## 04_boolean_index.py
def boolean_index(array):
    """
    Takes a numpy array and isolates all points that are within [0.2,0.4] for
    the first dimension and between [0.4,0.6] for
    the second dimension by creating a boolean index.
    """
    index = (array[:, 0] >= 0.2) & (array[:, 1] >= 0.4) & (array[:, 0] <= 0.4) & (array[:, 1] <= 0.6)

    return array[index]

## 04_python_functions.py
# List comprehension

def list_comprehension(tuple_list):
    """
    Takes a list of tuples and isolates all points that are within [0.2,0.4]
    for the first dimension and between [0.4,0.6] for the second dimension
    using a list comprehension.
    """
    filtered_list = [_ for _ in tuple_list if (_[0] >= 0.2) and (_[1] >= 0.4) and (_[0] <= 0.4) and (_[1] <= 0.6)]
    return filtered_list
	import pandas as pd
	import re
	from collections import Counter
	import spacy
	from tqdm import tqdm as tqdm
	from urllib.parse import urlparse
	import matplotlib.pyplot as plt

	# if en_core_web_sm is not installed
	# !python -m spacy download en_core_web_sm
	import streamlit as st
	from PIL import Image
	import os
	import random

	state = st.session_state

	BASE_PATH = "./DICE_IMAGES_100/"
	OPTIONS = ["1", "2", "3", "4", "5", "6", "NA"]
	def fixed_query(size):
	"""
	Returns a random array of given size and
	a fixed number of random query points.
	"""
	array = random_array(size)
	query_points = random_array(1e4)
	return (array, query_points)
	from scipy.spatial import cKDTree

	def kdtree(data, delta=0.1):
	"""
	Constructs a 2D k-d-tree from the input array and queries the points within a square around a given point.
	"""
	array, query_points = data
	tree = cKDTree(array)
	count = 0
	for point in query_points:
	@njit
	def boolean_index_numba_multiple(array, xmin, xmax, ymin, ymax, zmin, zmax):
	"""
	Takes a numpy array and isolates all points that are within [xmin, xmax]
	for the first dimension, between [ymin, ymax] for the second dimension
	and [zmin, zmax] for the third dimension by creating a boolean index.
	This function will be compiled with numba.
	"""
	index = ((array[:, 0] > xmin) & (array[:, 1] > ymin) & (array[:, 2] > zmin)
	& (array[:, 0] < xmax) & (array[:, 1] < ymax) & (array[:, 2] < zmax))
	import perfplot

	plt.figure(figsize=(10, 10))
	plt.title('Quantitative Comparison of Filtering Speeds')
	perfplot.show(
	setup=random_array,
	kernels=[loop, boolean_index, loop_numba, boolean_index_numba],
	n_range=[2**k for k in range(2, 22)],
	logx=True,
	logy=True,
	#Pandas

	import pandas as pd
	df = pd.DataFrame({'x': array[:, 0], 'y': array[:, 1], 'z': array[:, 2]})

	# Pandas query
	print('Pandas Query:\t\t', end='')
	%timeit df.query('x >= 0.2 and x <= 0.4 and y >= 0.4 and y <= 0.6')

	# Pandas eval
	from numba.typed import List
	from numba import njit

	@njit
	def boolean_index_numba(array):
	"""
	Takes a numpy array and isolates all points that are within [0.2,0.4] for
	the first dimension and between [0.4,0.6] for the second dimension
	by creating a boolean index.
	This function will be compiled with numba.
	def boolean_index(array):
	"""
	Takes a numpy array and isolates all points that are within [0.2,0.4] for
	the first dimension and between [0.4,0.6] for
	the second dimension by creating a boolean index.
	"""
	index = (array[:, 0] >= 0.2) & (array[:, 1] >= 0.4) & (array[:, 0] <= 0.4) & (array[:, 1] <= 0.6)

	return array[index]
	# List comprehension

	def list_comprehension(tuple_list):
	"""
	Takes a list of tuples and isolates all points that are within [0.2,0.4]
	for the first dimension and between [0.4,0.6] for the second dimension
	using a list comprehension.
	"""
	filtered_list = [_ for _ in tuple_list if (_[0] >= 0.2) and (_[1] >= 0.4) and (_[0] <= 0.4) and (_[1] <= 0.6)]
	return filtered_list