sevperez/search_relevance_4.py

## search_relevance_4.py
def search_td_df(td_df, text_df, query_string: str):
    """
    - Parameters: td_df (Pandas DataFrame) representing a term-document
      matrix, text_df (Pandas DataFrame) with a "text" column and rows
      that correspond to the td_df, and query_string (string).
    - Returns: A new dataframe that only contains rows from text_df where
      the "text" column had at least one occurence of each term in
      query_string. Additional columns are added to show the count of
      each term and the total count of all terms.
    """
    terms = query_string.lower().split(" ")
    filters = [td_df[term] > 0 for term in terms]
    filtered_td_df = td_df[np.all(filters, axis=0)][terms]
    filtered_td_df["terms_sum"] = filtered_td_df.agg(sum, axis=1) \
        .astype("int64")
    full_df = text_df.merge(filtered_td_df,
        left_index=True, right_index=True)

    return full_df.sort_values("terms_sum", ascending=False)
	def search_td_df(td_df, text_df, query_string: str):
	"""
	- Parameters: td_df (Pandas DataFrame) representing a term-document
	matrix, text_df (Pandas DataFrame) with a "text" column and rows
	that correspond to the td_df, and query_string (string).
	- Returns: A new dataframe that only contains rows from text_df where
	the "text" column had at least one occurence of each term in
	query_string. Additional columns are added to show the count of
	each term and the total count of all terms.
	"""
	terms = query_string.lower().split(" ")
	filters = [td_df[term] > 0 for term in terms]
	filtered_td_df = td_df[np.all(filters, axis=0)][terms]
	filtered_td_df["terms_sum"] = filtered_td_df.agg(sum, axis=1) \
	.astype("int64")
	full_df = text_df.merge(filtered_td_df,
	left_index=True, right_index=True)

	return full_df.sort_values("terms_sum", ascending=False)