Severin Perez sevperez

## search_relevance_6.py
def search_tfidf_df(tfidf_df, text_df, query_string: str):
    """
    - Parameters: tfidf_df (Pandas DataFrame) representing a tf-idf
      matrix, text_df (Pandas DataFrame) with a "text" column and rows
      that correspond to the tfidf_df, and query_string (string).
    - Returns: A new dataframe that only contains rows from text_df where
      the corresponding tf-idf value was greater than zero for each of
      the terms in query_string. Additional columns are added to show the
      tf-idf value for each term and the sum of the tf-idf values.
    """

## search_relevance_5.py
def document_frequency(td_df, term: str):
    """
    - Parameters: td_df (Pandas DataFrame) representing a term-document
      matrix, and term (string).
    - Returns: The document frequency value showing the number of
      documents in td_df where term occurs at least once.
    """
    return td_df[td_df[term] > 0].shape[0]

def inverse_document_frequency(td_df, term: str):

## search_relevance_4.py
def search_td_df(td_df, text_df, query_string: str):
    """
    - Parameters: td_df (Pandas DataFrame) representing a term-document
      matrix, text_df (Pandas DataFrame) with a "text" column and rows
      that correspond to the td_df, and query_string (string).
    - Returns: A new dataframe that only contains rows from text_df where
      the "text" column had at least one occurence of each term in
      query_string. Additional columns are added to show the count of
      each term and the total count of all terms.
    """

## search_relevance_3.py
def build_corpus(doc_list, dictionary):
    """
    - Parameters: doc_list (list of spaCy Document objects), dictionary
      (Gensim Dictionary object).
    - Returns: A list of documents in bag-of-words format, containing
      tuples with (token_id, token_count) for each token in the text.
    """
    return [dictionary.doc2bow(get_token_texts(doc)) for doc in doc_list]

def build_td_matrix(doc_list, dictionary):

## search_relevance_2.py
# load spaCy model
nlp = spacy.load("en_core_web_md")

# tokenize documents
def spacy_doc(model, text, lower=True):
    """
    - Parameters: model (spaCy model), text (string), lower (bool).
    - Returns: A spaCy Document object processed using the provided
      model. Document is all lowercase if lower is True.
    """

## search_relevance_1.py
def search_df_texts(df, query_string: str):
    """
    - Parameters: df (Pandas DataFrame), query_string (string). df must
      contain a "text" column.
    - Returns: A subset of df containing only rows where each term in
      query_string appeared as a substring in df["text"].
    """
    terms = query_string.lower().split(" ")
    filters = [df["text"].str.lower().str.contains(term) for term in terms]
    return df[np.all(filters, axis=0)]

## writing_5.py
def binary_search(items, target):
    left = 0
    right = len(items) - 1

    while left <= right:
        mid = (left + right) // 2

        if items[mid] == target:
            return True


## writing_4.py
class Receipt:
    def __init__(self, item, cost):
        self.item = item
        self.cost = cost

    def receipt_msg(self):
        return f"{self.item}, ${round(self.cost, 2)}"

    def deliver(self):
        msg = self.receipt_msg()

## writing_3.py
class Car:
    def __init__(self, max_speed):
        self.max_speed = max_speed
        self.current_speed = 0
        self.acceleration_rate = 1

    def accelerate(self):
        if self.current_speed < self.max_speed:
            self.current_speed += self.acceleration_rate


## writing_2.py
def double(num):
    return num * 2

my_numbers = [1, 2, 3, 4, 5]

doubled_numbers = list(map(double, my_numbers))
print(doubled_numbers)              # [2, 4, 6, 8, 10]
	def search_tfidf_df(tfidf_df, text_df, query_string: str):
	"""
	- Parameters: tfidf_df (Pandas DataFrame) representing a tf-idf
	matrix, text_df (Pandas DataFrame) with a "text" column and rows
	that correspond to the tfidf_df, and query_string (string).
	- Returns: A new dataframe that only contains rows from text_df where
	the corresponding tf-idf value was greater than zero for each of
	the terms in query_string. Additional columns are added to show the
	tf-idf value for each term and the sum of the tf-idf values.
	"""
	def document_frequency(td_df, term: str):
	"""
	- Parameters: td_df (Pandas DataFrame) representing a term-document
	matrix, and term (string).
	- Returns: The document frequency value showing the number of
	documents in td_df where term occurs at least once.
	"""
	return td_df[td_df[term] > 0].shape[0]

	def inverse_document_frequency(td_df, term: str):
	def search_td_df(td_df, text_df, query_string: str):
	"""
	- Parameters: td_df (Pandas DataFrame) representing a term-document
	matrix, text_df (Pandas DataFrame) with a "text" column and rows
	that correspond to the td_df, and query_string (string).
	- Returns: A new dataframe that only contains rows from text_df where
	the "text" column had at least one occurence of each term in
	query_string. Additional columns are added to show the count of
	each term and the total count of all terms.
	"""
	def build_corpus(doc_list, dictionary):
	"""
	- Parameters: doc_list (list of spaCy Document objects), dictionary
	(Gensim Dictionary object).
	- Returns: A list of documents in bag-of-words format, containing
	tuples with (token_id, token_count) for each token in the text.
	"""
	return [dictionary.doc2bow(get_token_texts(doc)) for doc in doc_list]

	def build_td_matrix(doc_list, dictionary):
	# load spaCy model
	nlp = spacy.load("en_core_web_md")

	# tokenize documents
	def spacy_doc(model, text, lower=True):
	"""
	- Parameters: model (spaCy model), text (string), lower (bool).
	- Returns: A spaCy Document object processed using the provided
	model. Document is all lowercase if lower is True.
	"""
	def search_df_texts(df, query_string: str):
	"""
	- Parameters: df (Pandas DataFrame), query_string (string). df must
	contain a "text" column.
	- Returns: A subset of df containing only rows where each term in
	query_string appeared as a substring in df["text"].
	"""
	terms = query_string.lower().split(" ")
	filters = [df["text"].str.lower().str.contains(term) for term in terms]
	return df[np.all(filters, axis=0)]
	def binary_search(items, target):
	left = 0
	right = len(items) - 1

	while left <= right:
	mid = (left + right) // 2

	if items[mid] == target:
	return True
	class Receipt:
	def __init__(self, item, cost):
	self.item = item
	self.cost = cost

	def receipt_msg(self):
	return f"{self.item}, ${round(self.cost, 2)}"

	def deliver(self):
	msg = self.receipt_msg()
	class Car:
	def __init__(self, max_speed):
	self.max_speed = max_speed
	self.current_speed = 0
	self.acceleration_rate = 1

	def accelerate(self):
	if self.current_speed < self.max_speed:
	self.current_speed += self.acceleration_rate
	def double(num):
	return num * 2

	my_numbers = [1, 2, 3, 4, 5]

	doubled_numbers = list(map(double, my_numbers))
	print(doubled_numbers) # [2, 4, 6, 8, 10]