Anders Ohrn anderzzz

## article_abstracts.csv
Article Nr.,Article Abstract
1,"The text is an excerpt from the EU AI Act, outlining its purpose and key regulatory provisions for the use, implementation, and monitoring of artificial intelligence (AI) systems within the Union, with an emphasis on safety, fundamental rights, and innovation support."
2,"The text outlines the scope and applicability of the EU AI Act, detailing the types of entities and activities it applies to, as well as specific exceptions and conditions under which it does not apply. It also mentions how this regulation interacts with other Union laws and regulations."
3,"The text is a comprehensive list of definitions related to the EU AI Act, providing detailed explanations of terms and concepts associated with AI systems, their operation, regulation, and potential risks. It would be useful for anyone seeking to understand the legal and technical language used in the context of AI regulation within the European Union."
4,"This text is a provision from the EU AI Act, outlining the responsi

## metadata_2.py
import os
import pandas as pd

metadata_labels = {
    '# River:': 'river',
    '# Station:': 'station',
    '# Latitude (DD):': 'latitude',
    '# Longitude (DD):': 'longitude',
    '# Catchment area (km≤):': 'catchment_area',
    '# Altitude (m ASL):': 'altitude',

## get_metadata_1.py
metadata = {}

with open('your_file_path') as file:
    lines = file.readlines()
    for line in lines:
        if line.startswith('# River:'):
            metadata['river'] = line.split(':')[1].strip()
        elif line.startswith('# Station:'):
            metadata['station'] = line.split(':')[1].strip()
        elif line.startswith('# Latitude (DD):'):

## second_parsedata.py
import pandas as pd

# Read the file and extract relevant lines based on the '#' symbol
with open('your_file_path') as file:
    lines = file.readlines()
    start_row = None
    for i, line in enumerate(lines):
        if line.startswith('# Data lines:'):
            start_row = i + 2  # Skip two lines after the metadata line
            break

## first_parse.py
import pandas as pd

# Read the file, skip the initial lines
data = pd.read_csv('your_file_path', delimiter=';', skiprows=37)

# Preview the extracted data
print(data.head())

## 6335150_Q_Day.Cmd.txt
# Title:                 GRDC STATION DATA FILE
#                        --------------
# Format:                DOS-ASCII
# Field delimiter:       ;
# missing values are indicated by -999.000
#
# file generation date:  2023-06-07
#
# GRDC-No.:              6335150
# River:                 RHINE RIVER

## vggmergedencoder.py
class EncoderVGGMerged(EncoderVGG):
    '''Special case of the VGG Encoder wherein the code is merged along the height/width dimension. This is a thin child
    class of `EncoderVGG`.

    Args:
        merger_type (str, optional): Defines how the code is merged.

    '''
    def __init__(self, merger_type='mean', pretrained_params=True):
        super(EncoderVGGMerged, self).__init__(pretrained_params=pretrained_params)

## runner_la.py
from torch.optim import SGD
from torch.utils.data import DataLoader

from sklearn.preprocessing import normalize

import fungidata
from ae_deep import EncoderVGGMerged
from cluster_utils import MemoryBank, LocalAggregationLoss

# Create fungi Dataset (details omitted)

## _prob_density.py
    def _prob_density(self, codes, indices):
        '''Compute the unnormalized probability density for the codes being in the sets defined by the indices

        Returns:
            prob_dens (Tensor): The unnormalized probability density of the vectors with given codes being part
                of the subset of codes specified by the indices. There is one dimension, the batch dimension

        '''
        ragged = len(set([np.count_nonzero(ind) for ind in indices])) != 1

## neighbour_sets.py
    def _nearest_neighbours(self, codes_data, indices):
        '''Ascertain indices in memory bank of the k-nearest neighbours to given codes

        Returns:
            indices_nearest (numpy.ndarray): Boolean array of k-nearest neighbours for the batch of codes

        '''
        self.neighbour_finder.fit(self.memory_bank.vectors)
        indices_nearest = self.neighbour_finder.kneighbors(codes_data, return_distance=False)
	Article Nr.,Article Abstract
	1,"The text is an excerpt from the EU AI Act, outlining its purpose and key regulatory provisions for the use, implementation, and monitoring of artificial intelligence (AI) systems within the Union, with an emphasis on safety, fundamental rights, and innovation support."
	2,"The text outlines the scope and applicability of the EU AI Act, detailing the types of entities and activities it applies to, as well as specific exceptions and conditions under which it does not apply. It also mentions how this regulation interacts with other Union laws and regulations."
	3,"The text is a comprehensive list of definitions related to the EU AI Act, providing detailed explanations of terms and concepts associated with AI systems, their operation, regulation, and potential risks. It would be useful for anyone seeking to understand the legal and technical language used in the context of AI regulation within the European Union."
	4,"This text is a provision from the EU AI Act, outlining the responsi
	import os
	import pandas as pd

	metadata_labels = {
	'# River:': 'river',
	'# Station:': 'station',
	'# Latitude (DD):': 'latitude',
	'# Longitude (DD):': 'longitude',
	'# Catchment area (km≤):': 'catchment_area',
	'# Altitude (m ASL):': 'altitude',
	metadata = {}

	with open('your_file_path') as file:
	lines = file.readlines()
	for line in lines:
	if line.startswith('# River:'):
	metadata['river'] = line.split(':')[1].strip()
	elif line.startswith('# Station:'):
	metadata['station'] = line.split(':')[1].strip()
	elif line.startswith('# Latitude (DD):'):
	import pandas as pd

	# Read the file and extract relevant lines based on the '#' symbol
	with open('your_file_path') as file:
	lines = file.readlines()
	start_row = None
	for i, line in enumerate(lines):
	if line.startswith('# Data lines:'):
	start_row = i + 2 # Skip two lines after the metadata line
	break
	import pandas as pd

	# Read the file, skip the initial lines
	data = pd.read_csv('your_file_path', delimiter=';', skiprows=37)

	# Preview the extracted data
	print(data.head())
	# Title: GRDC STATION DATA FILE
	# --------------
	# Format: DOS-ASCII
	# Field delimiter: ;
	# missing values are indicated by -999.000
	#
	# file generation date: 2023-06-07
	#
	# GRDC-No.: 6335150
	# River: RHINE RIVER
	class EncoderVGGMerged(EncoderVGG):
	'''Special case of the VGG Encoder wherein the code is merged along the height/width dimension. This is a thin child
	class of `EncoderVGG`.

	Args:
	merger_type (str, optional): Defines how the code is merged.

	'''
	def __init__(self, merger_type='mean', pretrained_params=True):
	super(EncoderVGGMerged, self).__init__(pretrained_params=pretrained_params)
	from torch.optim import SGD
	from torch.utils.data import DataLoader

	from sklearn.preprocessing import normalize

	import fungidata
	from ae_deep import EncoderVGGMerged
	from cluster_utils import MemoryBank, LocalAggregationLoss

	# Create fungi Dataset (details omitted)
	def _prob_density(self, codes, indices):
	'''Compute the unnormalized probability density for the codes being in the sets defined by the indices

	Returns:
	prob_dens (Tensor): The unnormalized probability density of the vectors with given codes being part
	of the subset of codes specified by the indices. There is one dimension, the batch dimension

	'''
	ragged = len(set([np.count_nonzero(ind) for ind in indices])) != 1
	def _nearest_neighbours(self, codes_data, indices):
	'''Ascertain indices in memory bank of the k-nearest neighbours to given codes

	Returns:
	indices_nearest (numpy.ndarray): Boolean array of k-nearest neighbours for the batch of codes

	'''
	self.neighbour_finder.fit(self.memory_bank.vectors)
	indices_nearest = self.neighbour_finder.kneighbors(codes_data, return_distance=False)