Vansika Pareek vansika

## error_msg_plan.txt
1. generae and save candidate sets metadata, should contain `from_date`, `to_date`, fields.
2. send the `from_date`, `to_date` with recommendation messages to lemmy.
3. store the `from_date`, `to_date` in recommendation metadata in lemmy.
4. If a user has empty recommendations, fetch `from_date`, `to_date` and use them to check if user has submitted any listens in
   given time frame, and display error message accordingly.

Note: `from_date`, `to_date`, is the one week (or more) window from which user history is fetched to generate recs!

## lala.tsx
{getArtistLink(recommendation)}

const getArtistLink = (listen: Listen) => {
  const artistName = _.get(listen, "track_metadata.artist_name");
  const firstArtist = _.first(
    _.get(listen, "track_metadata.additional_info.artist_mbids")
  );
  if (firstArtist) {
    return (
      <a

## gist:a197a36d2dfe353a5996d7e12c1332f1
CREATE TABLE recommendation_feedback (
    id                      SERIAL, -- PK
    user_id                 INTEGER NOT NULL, -- FK to "user".id
    recording_mbid          UUID NOT NULL,
    rating                  ENUM NOT NULL,
    created                 TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW()
);

CREATE TYPE rating AS ENUM("I like this", "I really like this", "I don't like this", "I never want to hear this again");

## gist:8bc2cd0623771620d4b1a713106521d0
[2020-09-29 13:36:03,565] INFO in request_consumer: Received a request!
[2020-09-29 13:36:03,581] INFO in create_dataframes: Fetching listens to create dataframes...
[2020-09-29 13:36:15,481] INFO in create_dataframes: Listen count from 2020-04-01 17:59:43 to 2020-09-28 17:59:43: 11034222
[2020-09-29 13:36:15,481] INFO in create_dataframes: Loading mapping from HDFS...
[2020-09-29 13:36:41,385] INFO in create_dataframes: Number of distinct rows in the mapping: 4510905
[2020-09-29 13:36:41,385] INFO in create_dataframes: Mapping listens...
[2020-09-29 13:38:21,343] INFO in create_dataframes: Listen count after mapping: 6473515
[2020-09-29 13:38:21,343] INFO in create_dataframes: Preparing users data and saving to HDFS...
[2020-09-29 13:39:49,879] INFO in create_dataframes: Preparing recordings data and saving to HDFS...
[2020-09-29 13:41:21,994] INFO in create_dataframes: Preparing listen data dump and playcounts, saving playcounts to HDFS...

## gist:d713e00391618a4dc0f0d3c04f843b1b
+-------------------+----------------------+---------------------+--------------------+--------------------+--------------------+--------------------+--------------------------------+--------------------+--------------------+----------------------------+--------------------+--------------------------+------+
|mb_artist_credit_id|mb_artist_credit_mbids|mb_artist_credit_name|   mb_recording_mbid|   mb_recording_name|     mb_release_mbid|     mb_release_name|msb_artist_credit_name_matchable|     msb_artist_msid|  msb_recording_msid|msb_recording_name_matchable|    msb_release_msid|msb_release_name_matchable|unique|
+-------------------+----------------------+---------------------+--------------------+--------------------+--------------------+--------------------+--------------------------------+--------------------+--------------------+----------------------------+--------------------+--------------------------+------+
|               1444|  [8385f26a-f374-4a...|           VNV Nation|0c9fb744-4ed2-44d...|

## gist:f379e6fa979531072e0c3d5389268fcb
[2020-09-29 09:50:23,675] INFO in create_dataframes: Fetching listens to create dataframes...
[2020-09-29 09:50:28,873] INFO in create_dataframes: Listen count from 2020-04-01 17:59:43 to 2020-09-28 17:59:43: 11034222
[2020-09-29 09:50:30,195] INFO in create_dataframes: Mapping listens...
[2020-09-29 09:52:19,645] INFO in create_dataframes: Listen count after mapping: 220424882

## gist:cf1d7439c7c57deeaf726a77566803ee
20/09/26 00:06:29 ERROR TaskSetManager: Task 5 in stage 4022.0 failed 4 times; aborting job
Exception in thread "serve toLocalIterator" org.apache.spark.SparkException: Job aborted due to stage failure: Task 5 in stage 4022.0 failed 4 times, most recent failure: Lost task 5.3 in stage 4022.0 (TID 23237, 10.0.1.24, executor 2): org.apache.spark.api.python.PythonException: Traceback (most recent call last):
  File "/usr/local/spark/python/lib/pyspark.zip/pyspark/worker.py", line 366, in main
    func, profiler, deserializer, serializer = read_udfs(pickleSer, infile, eval_type)
  File "/usr/local/spark/python/lib/pyspark.zip/pyspark/worker.py", line 241, in read_udfs
    arg_offsets, udf = read_single_udf(pickleSer, infile, eval_type, runner_conf)
  File "/usr/local/spark/python/lib/pyspark.zip/pyspark/worker.py", line 168, in read_single_udf
    f, return_type = read_command(pickleSer, infile)
  File "/usr/local/spark/python/lib/pyspark.zip/pyspark/worker.py", line 69, in read_command
    command = serializer._

## gist:7fbf4194942725949a355efe3273ca46
result = [["ecf36230-f81e-4825-99c1-947cbe1bd809",0.598],["77328074-a4a9-4d34-8357-052ffe236371",0.56],["02bbe3fc-9834-43f9-9a98-42b15be56d54",0.557],["e37ea86c-3b30-41c0-bcee-4099d896e9ed",0.523],["28655fdc-ffac-467a-a285-aad3456f483c",0.52],["574c3d3d-f89d-43e7-9927-f3e38f1fe938",0.516],["9254f8dc-bf3f-4179-ba86-4a6ffcf76df6",0.512],["bfdb989d-92a2-411a-84fb-dff8f7d586aa",0.51],["70b71efe-d26b-4da3-82ad-4140b7c35ff4",0.51],["03bc40e4-5688-42e9-b0c4-63a3a45ca247",0.508],["ed719c5c-7f08-4272-92fd-1e3ef2539ed0",0.506],["fe7640bd-e721-4827-8a7b-8e9569317298",0.505],["3e5bc764-dfa4-454d-a2a9-0ee84ae35db2",0.504],["6e29af7a-dc0f-4486-9681-0dadefdb3509",0.504],["75739f06-8499-4159-997e-f91cd57c3398",0.504],["47e3dc92-8068-4312-906d-e9b7f2231e0b",0.502],["9d151cec-c1e2-4f81-86ec-76ba29f91bf2",0.502],["9db4df99-c4a7-4bcd-a64f-b94953405e01",0.5],["20295bc6-491d-4381-a02d-fbd3574a504b",0.5],["e6a38991-9a4e-4d72-bd5b-833407731667",0.5]]

## gist:5251aab872c8a6d991dd3e3d095ac937
Before:
[
    {
        "recording_mbid": "4a419c47-baf0-46c1-9f57-1bad1bfa2a47"
    }
]

After:
[
    {

## gist:e5cc1794acee7fdbbcdfb14d237a8a74
----------------------------------------
| artist_credit_id|        artist_mbids|
----------------------------------------
|                1|             [a, b,]|
----------------------------------------
|                1|               [c,d]|
----------------------------------------
	1. generae and save candidate sets metadata, should contain `from_date`, `to_date`, fields.
	2. send the `from_date`, `to_date` with recommendation messages to lemmy.
	3. store the `from_date`, `to_date` in recommendation metadata in lemmy.
	4. If a user has empty recommendations, fetch `from_date`, `to_date` and use them to check if user has submitted any listens in
	given time frame, and display error message accordingly.

	Note: `from_date`, `to_date`, is the one week (or more) window from which user history is fetched to generate recs!
	{getArtistLink(recommendation)}

	const getArtistLink = (listen: Listen) => {
	const artistName = _.get(listen, "track_metadata.artist_name");
	const firstArtist = _.first(
	_.get(listen, "track_metadata.additional_info.artist_mbids")
	);
	if (firstArtist) {
	return (
	<a
	CREATE TABLE recommendation_feedback (
	id SERIAL, -- PK
	user_id INTEGER NOT NULL, -- FK to "user".id
	recording_mbid UUID NOT NULL,
	rating ENUM NOT NULL,
	created TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW()
	);

	CREATE TYPE rating AS ENUM("I like this", "I really like this", "I don't like this", "I never want to hear this again");
	[2020-09-29 13:36:03,565] INFO in request_consumer: Received a request!
	[2020-09-29 13:36:03,581] INFO in create_dataframes: Fetching listens to create dataframes...
	[2020-09-29 13:36:15,481] INFO in create_dataframes: Listen count from 2020-04-01 17:59:43 to 2020-09-28 17:59:43: 11034222
	[2020-09-29 13:36:15,481] INFO in create_dataframes: Loading mapping from HDFS...
	[2020-09-29 13:36:41,385] INFO in create_dataframes: Number of distinct rows in the mapping: 4510905
	[2020-09-29 13:36:41,385] INFO in create_dataframes: Mapping listens...
	[2020-09-29 13:38:21,343] INFO in create_dataframes: Listen count after mapping: 6473515
	[2020-09-29 13:38:21,343] INFO in create_dataframes: Preparing users data and saving to HDFS...
	[2020-09-29 13:39:49,879] INFO in create_dataframes: Preparing recordings data and saving to HDFS...
	[2020-09-29 13:41:21,994] INFO in create_dataframes: Preparing listen data dump and playcounts, saving playcounts to HDFS...
	+-------------------+----------------------+---------------------+--------------------+--------------------+--------------------+--------------------+--------------------------------+--------------------+--------------------+----------------------------+--------------------+--------------------------+------+
	\|mb_artist_credit_id\|mb_artist_credit_mbids\|mb_artist_credit_name\| mb_recording_mbid\| mb_recording_name\| mb_release_mbid\| mb_release_name\|msb_artist_credit_name_matchable\| msb_artist_msid\| msb_recording_msid\|msb_recording_name_matchable\| msb_release_msid\|msb_release_name_matchable\|unique\|
	+-------------------+----------------------+---------------------+--------------------+--------------------+--------------------+--------------------+--------------------------------+--------------------+--------------------+----------------------------+--------------------+--------------------------+------+
	\| 1444\| [8385f26a-f374-4a...\| VNV Nation\|0c9fb744-4ed2-44d...\|
	[2020-09-29 09:50:23,675] INFO in create_dataframes: Fetching listens to create dataframes...
	[2020-09-29 09:50:28,873] INFO in create_dataframes: Listen count from 2020-04-01 17:59:43 to 2020-09-28 17:59:43: 11034222
	[2020-09-29 09:50:30,195] INFO in create_dataframes: Mapping listens...
	[2020-09-29 09:52:19,645] INFO in create_dataframes: Listen count after mapping: 220424882
	20/09/26 00:06:29 ERROR TaskSetManager: Task 5 in stage 4022.0 failed 4 times; aborting job
	Exception in thread "serve toLocalIterator" org.apache.spark.SparkException: Job aborted due to stage failure: Task 5 in stage 4022.0 failed 4 times, most recent failure: Lost task 5.3 in stage 4022.0 (TID 23237, 10.0.1.24, executor 2): org.apache.spark.api.python.PythonException: Traceback (most recent call last):
	File "/usr/local/spark/python/lib/pyspark.zip/pyspark/worker.py", line 366, in main
	func, profiler, deserializer, serializer = read_udfs(pickleSer, infile, eval_type)
	File "/usr/local/spark/python/lib/pyspark.zip/pyspark/worker.py", line 241, in read_udfs
	arg_offsets, udf = read_single_udf(pickleSer, infile, eval_type, runner_conf)
	File "/usr/local/spark/python/lib/pyspark.zip/pyspark/worker.py", line 168, in read_single_udf
	f, return_type = read_command(pickleSer, infile)
	File "/usr/local/spark/python/lib/pyspark.zip/pyspark/worker.py", line 69, in read_command
	command = serializer._
	Before:
	[
	{
	"recording_mbid": "4a419c47-baf0-46c1-9f57-1bad1bfa2a47"
	}
	]

	After:
	[
	{
	----------------------------------------
	\| artist_credit_id\| artist_mbids\|
	----------------------------------------
	\| 1\| [a, b,]\|
	----------------------------------------
	\| 1\| [c,d]\|
	----------------------------------------