ololobus/copy-parallel.diff

## copy-parallel.diff
From 22045780b95db74a99dc6e13e57413401da92c81 Mon Sep 17 00:00:00 2001
From: Alex K <alex.lumir@gmail.com>
Date: Mon, 10 Jul 2017 17:48:26 +0300
Subject: [PATCH 01/13] Dummy COPY FROM BGWorker v0.1

---
 src/backend/commands/copy.c       | 42 +++++++++++++++++++++++++++++++++++++++
 src/backend/postmaster/bgworker.c |  4 ++++
 src/include/commands/copy.h       |  1 +
 3 files changed, 47 insertions(+)

diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index 29c5376b2d..964469d01e 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -38,8 +38,10 @@
 #include "optimizer/planner.h"
 #include "nodes/makefuncs.h"
 #include "parser/parse_relation.h"
+#include "postmaster/bgworker.h"
 #include "rewrite/rewriteHandler.h"
 #include "storage/fd.h"
+#include "storage/lwlock.h"
 #include "tcop/tcopprot.h"
 #include "utils/builtins.h"
 #include "utils/lsyscache.h"
@@ -2336,6 +2338,20 @@ limit_printout_length(const char *str)
 }

 /*
+ * Copy FROM Background Worker main loop.
+ */
+void
+CopyFromBgwMainLoop(Datum main_arg)
+{
+    int i = DatumGetInt32(main_arg);
+    int j;
+    j = i + 1;
+
+    elog(LOG, "BGWorker process: Result: %d", j);
+}
+
+
+/*
  * Copy FROM file to relation.
  */
 uint64
@@ -2367,6 +2383,32 @@ CopyFrom(CopyState cstate)
 	Size		bufferedTuplesSize = 0;
 	int			firstBufferedLineNo = 0;

+    // BG Worker setup start
+    BackgroundWorker worker;
+    BackgroundWorkerHandle *bgwhandle = NULL;
+	BgwHandleStatus bgwstatus;
+	pid_t		bgwpid;
+
+    int bgwarg = 3;
+
+    MemSet(&worker, 0, sizeof(BackgroundWorker));
+
+    worker.bgw_flags = BGWORKER_SHMEM_ACCESS |
+		BGWORKER_BACKEND_DATABASE_CONNECTION;
+    worker.bgw_start_time = BgWorkerStart_ConsistentState;
+    worker.bgw_restart_time = BGW_NEVER_RESTART;
+    worker.bgw_notify_pid = MyProcPid;
+    sprintf(worker.bgw_library_name, "postgres");
+    sprintf(worker.bgw_function_name, "CopyFromBgwMainLoop");
+
+    snprintf(worker.bgw_name, BGW_MAXLEN, "copy_from_bgw_pool_worker_%d", 1);
+    worker.bgw_main_arg = Int32GetDatum(bgwarg);
+    RegisterDynamicBackgroundWorker(&worker, &bgwhandle);
+    // BG Worker setup end
+
+    bgwstatus = WaitForBackgroundWorkerStartup(bgwhandle, &bgwpid);
+    elog(LOG, "Main COPY process (pid %d): BGWorker started (pid %d).", MyProcPid, bgwpid);
+
 	Assert(cstate->rel);

 	/*
diff --git a/src/backend/postmaster/bgworker.c b/src/backend/postmaster/bgworker.c
index 28af6f0f07..b41db252ce 100644
--- a/src/backend/postmaster/bgworker.c
+++ b/src/backend/postmaster/bgworker.c
@@ -16,6 +16,7 @@

 #include "libpq/pqsignal.h"
 #include "access/parallel.h"
+#include "commands/copy.h"
 #include "miscadmin.h"
 #include "pgstat.h"
 #include "port/atomics.h"
@@ -129,6 +130,9 @@ static const struct
 	},
 	{
 		"ApplyWorkerMain", ApplyWorkerMain
+	},
+	{
+		"CopyFromBgwMainLoop", CopyFromBgwMainLoop
 	}
 };

diff --git a/src/include/commands/copy.h b/src/include/commands/copy.h
index bba64fc3d7..0b690b39ca 100644
--- a/src/include/commands/copy.h
+++ b/src/include/commands/copy.h
@@ -37,6 +37,7 @@ extern bool NextCopyFromRawFields(CopyState cstate,
 					  char ***fields, int *nfields);
 extern void CopyFromErrorCallback(void *arg);

+extern void CopyFromBgwMainLoop(Datum main_arg);
 extern uint64 CopyFrom(CopyState cstate);

 extern DestReceiver *CreateCopyDestReceiver(void);
--
2.11.0


From 7289acd9a367e134c96ed66b1b3ac0c88e22e3ad Mon Sep 17 00:00:00 2001
From: Alex K <alex.lumir@gmail.com>
Date: Fri, 14 Jul 2017 15:08:31 +0300
Subject: [PATCH 02/13] Do not wait for worker startup to prevent segfault
 during the initdb

---
 src/backend/commands/copy.c | 53 ++++++++++++++++++++++++---------------------
 1 file changed, 28 insertions(+), 25 deletions(-)

diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index 964469d01e..81af534d4d 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -2383,31 +2383,34 @@ CopyFrom(CopyState cstate)
 	Size		bufferedTuplesSize = 0;
 	int			firstBufferedLineNo = 0;

-    // BG Worker setup start
-    BackgroundWorker worker;
-    BackgroundWorkerHandle *bgwhandle = NULL;
-	BgwHandleStatus bgwstatus;
-	pid_t		bgwpid;
-
-    int bgwarg = 3;
-
-    MemSet(&worker, 0, sizeof(BackgroundWorker));
-
-    worker.bgw_flags = BGWORKER_SHMEM_ACCESS |
-		BGWORKER_BACKEND_DATABASE_CONNECTION;
-    worker.bgw_start_time = BgWorkerStart_ConsistentState;
-    worker.bgw_restart_time = BGW_NEVER_RESTART;
-    worker.bgw_notify_pid = MyProcPid;
-    sprintf(worker.bgw_library_name, "postgres");
-    sprintf(worker.bgw_function_name, "CopyFromBgwMainLoop");
-
-    snprintf(worker.bgw_name, BGW_MAXLEN, "copy_from_bgw_pool_worker_%d", 1);
-    worker.bgw_main_arg = Int32GetDatum(bgwarg);
-    RegisterDynamicBackgroundWorker(&worker, &bgwhandle);
-    // BG Worker setup end
-
-    bgwstatus = WaitForBackgroundWorkerStartup(bgwhandle, &bgwpid);
-    elog(LOG, "Main COPY process (pid %d): BGWorker started (pid %d).", MyProcPid, bgwpid);
+    if (!IsBootstrapProcessingMode())
+    {
+        // BG Worker setup start
+        BackgroundWorker worker;
+        BackgroundWorkerHandle *bgwhandle = NULL;
+    	BgwHandleStatus bgwstatus;
+    	pid_t		bgwpid;
+
+        int bgwarg = 3;
+
+        MemSet(&worker, 0, sizeof(BackgroundWorker));
+
+        worker.bgw_flags = BGWORKER_SHMEM_ACCESS |
+    		BGWORKER_BACKEND_DATABASE_CONNECTION;
+        worker.bgw_start_time = BgWorkerStart_ConsistentState;
+        worker.bgw_restart_time = BGW_NEVER_RESTART;
+        worker.bgw_notify_pid = MyProcPid;
+        sprintf(worker.bgw_library_name, "postgres");
+        sprintf(worker.bgw_function_name, "CopyFromBgwMainLoop");
+
+        snprintf(worker.bgw_name, BGW_MAXLEN, "copy_from_bgw_pool_worker_%d", 1);
+        worker.bgw_main_arg = Int32GetDatum(bgwarg);
+        RegisterDynamicBackgroundWorker(&worker, &bgwhandle);
+        // BG Worker setup end
+
+        // bgwstatus = WaitForBackgroundWorkerStartup(bgwhandle, &bgwpid);
+        elog(LOG, "Main COPY process (pid %d): BGWorker started (pid %d).", MyProcPid, bgwpid);
+    }

 	Assert(cstate->rel);

--
2.11.0


From d04ec3310a343436ee95ad02ff3b2ede66547193 Mon Sep 17 00:00:00 2001
From: Alex K <alex.lumir@gmail.com>
Date: Fri, 14 Jul 2017 16:57:57 +0300
Subject: [PATCH 03/13] Check that IsUnderPostmaster before BGWorkers setup

---
 src/backend/commands/copy.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index 81af534d4d..9fb2c0bfee 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -224,6 +224,8 @@ typedef struct CopyStateData
 	char	   *raw_buf;
 	int			raw_buf_index;	/* next byte to process */
 	int			raw_buf_len;	/* total # of bytes stored */
+
+    bool        allow_parallel;
 } CopyStateData;

 /* DestReceiver for COPY (query) TO */
@@ -1409,6 +1411,8 @@ BeginCopy(ParseState *pstate,
 	/* Allocate workspace and zero all fields */
 	cstate = (CopyStateData *) palloc0(sizeof(CopyStateData));

+    cstate->allow_parallel = IsNormalProcessingMode() && IsUnderPostmaster;
+
 	/*
 	 * We allocate everything used by a cstate in a new memory context. This
 	 * avoids memory leaks during repeated use of COPY in a query.
@@ -2383,7 +2387,7 @@ CopyFrom(CopyState cstate)
 	Size		bufferedTuplesSize = 0;
 	int			firstBufferedLineNo = 0;

-    if (!IsBootstrapProcessingMode())
+    if (cstate->allow_parallel)
     {
         // BG Worker setup start
         BackgroundWorker worker;
@@ -2408,7 +2412,7 @@ CopyFrom(CopyState cstate)
         RegisterDynamicBackgroundWorker(&worker, &bgwhandle);
         // BG Worker setup end

-        // bgwstatus = WaitForBackgroundWorkerStartup(bgwhandle, &bgwpid);
+        bgwstatus = WaitForBackgroundWorkerStartup(bgwhandle, &bgwpid);
         elog(LOG, "Main COPY process (pid %d): BGWorker started (pid %d).", MyProcPid, bgwpid);
     }

--
2.11.0


From cdb3846ecf54c04298af937136625c0e1ceebf4a Mon Sep 17 00:00:00 2001
From: Alex K <alex.lumir@gmail.com>
Date: Fri, 21 Jul 2017 15:25:02 +0300
Subject: [PATCH 04/13] Very very dummy workers v0.2, but with a proper shared
 memory usage

---
 src/backend/commands/copy.c              | 1155 ++++++++++++++++++++++++++++--
 src/backend/storage/lmgr/lwlock.c        |    3 +-
 src/backend/storage/lmgr/lwlocknames.txt |    1 +
 src/include/commands/copy.h              |    2 +
 src/include/storage/lwlock.h             |    1 +
 5 files changed, 1117 insertions(+), 45 deletions(-)

diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index 9fb2c0bfee..5a087f33c7 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -36,18 +36,29 @@
 #include "miscadmin.h"
 #include "optimizer/clauses.h"
 #include "optimizer/planner.h"
+#include "optimizer/cost.h"
 #include "nodes/makefuncs.h"
 #include "parser/parse_relation.h"
+#include "pgstat.h"
 #include "postmaster/bgworker.h"
 #include "rewrite/rewriteHandler.h"
+#include "storage/dsm.h"
 #include "storage/fd.h"
 #include "storage/lwlock.h"
+#include "storage/spin.h"
+#include "storage/shm_mq.h"
+#include "storage/shm_toc.h"
+#include "storage/ipc.h"
+#include "storage/procarray.h"
+#include "storage/procsignal.h"
+#include "storage/procarray.h"
 #include "tcop/tcopprot.h"
 #include "utils/builtins.h"
 #include "utils/lsyscache.h"
 #include "utils/memutils.h"
 #include "utils/portal.h"
 #include "utils/rel.h"
+#include "utils/resowner.h"
 #include "utils/rls.h"
 #include "utils/snapmgr.h"

@@ -236,6 +247,51 @@ typedef struct
 	uint64		processed;		/* # of tuples processed */
 } DR_copy;

+typedef struct CopyFromStateData
+{
+    CopyState       cstate;
+    HeapTuple       tuple;
+    TupleDesc       tupDesc;
+    Datum           *values;
+    bool            *nulls;
+    ResultRelInfo   *resultRelInfo;
+    ResultRelInfo   *saved_resultRelInfo;
+    EState          *estate;
+    ExprContext     *econtext;
+    TupleTableSlot  *myslot;
+    MemoryContext   oldcontext;
+
+    ErrorContextCallback errcallback;
+    CommandId            mycid;
+    int                  hi_options;
+    BulkInsertState      bistate;
+    uint64               processed;
+    bool                 useHeapMultiInsert;
+    int                  nBufferedTuples;
+    int                  prev_leaf_part_index;
+
+#define MAX_BUFFERED_TUPLES 1000
+    HeapTuple           *bufferedTuples;
+    Size                 bufferedTuplesSize;
+    int                  firstBufferedLineNo;
+} CopyFromStateData;
+
+typedef struct
+{
+    int                     nworkers;
+    BackgroundWorkerHandle *handle[FLEXIBLE_ARRAY_MEMBER];
+} WorkerState;
+
+typedef struct
+{
+    slock_t mutex;
+    int curr_line;
+    int workers_total;
+    int workers_attached;
+    int workers_ready;
+} ParallelState;
+
+#define PG_COPY_FROM_SHM_MQ_MAGIC 0x79fb2447

 /*
  * These macros centralize code used to process line_buf and raw_buf buffers.
@@ -302,6 +358,7 @@ if (1) \
 	goto not_end_of_copy; \
 } else ((void) 0)

+
 static const char BinarySignature[11] = "PGCOPY\n\377\r\n\0";


@@ -356,6 +413,23 @@ static void CopySendInt16(CopyState cstate, int16 val);
 static bool CopyGetInt16(CopyState cstate, int16 *val);


+static ParallelState* shm_mq_setup(int64 queue_size, int32 nworkers,
+                                   dsm_segment **segp, shm_mq_handle **output,
+                                   shm_mq_handle **input);
+static void setup_dynamic_shared_memory(int64 queue_size, int nworkers,
+                                        dsm_segment **segp,
+                                        ParallelState **hdrp,
+                                        shm_mq **outp, shm_mq **inp);
+static WorkerState *setup_background_workers(int nworkers,
+                                             dsm_segment *seg);
+static void cleanup_background_workers(dsm_segment *seg, Datum arg);
+static void wait_for_workers_to_become_ready(WorkerState *wstate,
+                                             volatile ParallelState *pst);
+static bool check_worker_status(WorkerState *wstate);
+
+static void handle_sigterm(SIGNAL_ARGS);
+
+
 /*
  * Send copy start/stop messages for frontend copies.  These have changed
  * in past protocol redesigns.
@@ -989,7 +1063,16 @@ DoCopy(ParseState *pstate, const CopyStmt *stmt,

 		cstate = BeginCopyFrom(pstate, rel, stmt->filename, stmt->is_program,
 							   NULL, stmt->attlist, stmt->options);
-		*processed = CopyFrom(cstate);	/* copy from file to database */
+
+        if (cstate->allow_parallel)	/* copy from file to database */
+        {
+    		*processed = ParallelCopyFrom(cstate);
+        }
+        else
+        {
+    		*processed = CopyFrom(cstate);
+        }
+
 		EndCopyFrom(cstate);
 	}
 	else
@@ -2342,20 +2425,6 @@ limit_printout_length(const char *str)
 }

 /*
- * Copy FROM Background Worker main loop.
- */
-void
-CopyFromBgwMainLoop(Datum main_arg)
-{
-    int i = DatumGetInt32(main_arg);
-    int j;
-    j = i + 1;
-
-    elog(LOG, "BGWorker process: Result: %d", j);
-}
-
-
-/*
  * Copy FROM file to relation.
  */
 uint64
@@ -2387,35 +2456,6 @@ CopyFrom(CopyState cstate)
 	Size		bufferedTuplesSize = 0;
 	int			firstBufferedLineNo = 0;

-    if (cstate->allow_parallel)
-    {
-        // BG Worker setup start
-        BackgroundWorker worker;
-        BackgroundWorkerHandle *bgwhandle = NULL;
-    	BgwHandleStatus bgwstatus;
-    	pid_t		bgwpid;
-
-        int bgwarg = 3;
-
-        MemSet(&worker, 0, sizeof(BackgroundWorker));
-
-        worker.bgw_flags = BGWORKER_SHMEM_ACCESS |
-    		BGWORKER_BACKEND_DATABASE_CONNECTION;
-        worker.bgw_start_time = BgWorkerStart_ConsistentState;
-        worker.bgw_restart_time = BGW_NEVER_RESTART;
-        worker.bgw_notify_pid = MyProcPid;
-        sprintf(worker.bgw_library_name, "postgres");
-        sprintf(worker.bgw_function_name, "CopyFromBgwMainLoop");
-
-        snprintf(worker.bgw_name, BGW_MAXLEN, "copy_from_bgw_pool_worker_%d", 1);
-        worker.bgw_main_arg = Int32GetDatum(bgwarg);
-        RegisterDynamicBackgroundWorker(&worker, &bgwhandle);
-        // BG Worker setup end
-
-        bgwstatus = WaitForBackgroundWorkerStartup(bgwhandle, &bgwpid);
-        elog(LOG, "Main COPY process (pid %d): BGWorker started (pid %d).", MyProcPid, bgwpid);
-    }
-
 	Assert(cstate->rel);

 	/*
@@ -4976,3 +5016,1030 @@ CreateCopyDestReceiver(void)

 	return (DestReceiver *) self;
 }
+
+
+
+/*
+ * Copy FROM Background Worker main loop.
+ */
+void
+CopyFromBgwMainLoop(Datum main_arg)
+{
+    volatile ParallelState *hdr;
+    dsm_segment     *seg;
+    shm_toc         *toc;
+    shm_mq_handle   *inqh;
+    shm_mq_handle   *outqh;
+    int              myworkernumber;
+    PGPROC          *registrant;
+    int              prev = 0;
+
+    /*
+     * Establish signal handlers.
+     *
+     * We want CHECK_FOR_INTERRUPTS() to kill off this worker process just as
+     * it would a normal user backend.  To make that happen, we establish a
+     * signal handler that is a stripped-down version of die().
+     */
+    pqsignal(SIGTERM, handle_sigterm);
+    BackgroundWorkerUnblockSignals();
+
+    /*
+     * Connect to the dynamic shared memory segment.
+     *
+     * The backend that registered this worker passed us the ID of a shared
+     * memory segment to which we must attach for further instructions.  In
+     * order to attach to dynamic shared memory, we need a resource owner.
+     * Once we've mapped the segment in our address space, attach to the table
+     * of contents so we can locate the various data structures we'll need to
+     * find within the segment.
+     */
+    CurrentResourceOwner = ResourceOwnerCreate(NULL, "test_shm_mq worker");
+    seg = dsm_attach(DatumGetInt32(main_arg));
+    if (seg == NULL)
+        ereport(ERROR,
+                (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+                 errmsg("unable to map dynamic shared memory segment")));
+    toc = shm_toc_attach(PG_COPY_FROM_SHM_MQ_MAGIC, dsm_segment_address(seg));
+    if (toc == NULL)
+        ereport(ERROR,
+                (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+                 errmsg("bad magic number in dynamic shared memory segment")));
+
+    /*
+     * Acquire a worker number.
+     *
+     * By convention, the process registering this background worker should
+     * have stored the control structure at key 0.  We look up that key to
+     * find it.  Our worker number gives our identity: there may be just one
+     * worker involved in this parallel operation, or there may be many.
+     */
+    hdr = shm_toc_lookup(toc, 0, false);
+    LWLockAcquire(CopyFromBgwLock, LW_EXCLUSIVE);
+    // SpinLockAcquire(&pst->mutex);
+    myworkernumber = ++hdr->workers_attached;
+    // SpinLockRelease(&pst->mutex);
+    LWLockRelease(CopyFromBgwLock);
+    if (myworkernumber > hdr->workers_total)
+        ereport(ERROR,
+                (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+                 errmsg("too many message queue testing workers already")));
+
+    /*
+     * Attach to the appropriate message queues.
+     */
+    // attach_to_queues(seg, toc, myworkernumber, &inqh, &outqh);
+
+    /*
+     * Indicate that we're fully initialized and ready to begin the main part
+     * of the parallel operation.
+     *
+     * Once we signal that we're ready, the user backend is entitled to assume
+     * that our on_dsm_detach callbacks will fire before we disconnect from
+     * the shared memory segment and exit.  Generally, that means we must have
+     * attached to all relevant dynamic shared memory data structures by now.
+     */
+    LWLockAcquire(CopyFromBgwLock, LW_EXCLUSIVE);
+    // SpinLockAcquire(&pst->mutex);
+    ++hdr->workers_ready;
+    elog(LOG, "BGWorker #%d started with curr_line=%d", myworkernumber, hdr->curr_line);
+    // SpinLockRelease(&pst->mutex);
+    LWLockRelease(CopyFromBgwLock);
+    registrant = BackendPidGetProc(MyBgworkerEntry->bgw_notify_pid);
+    if (registrant == NULL)
+    {
+        elog(DEBUG1, "registrant backend has exited prematurely");
+        proc_exit(1);
+    }
+    SetLatch(&registrant->procLatch);
+
+    /* Do the work. */
+    // copy_messages(inqh, outqh);
+    for (;;)
+    {
+        CHECK_FOR_INTERRUPTS();
+
+        LWLockAcquire(CopyFromBgwLock, LW_EXCLUSIVE);
+        // SpinLockAcquire(&hdr->mutex);
+        if (hdr->curr_line != prev)
+        {
+            elog(LOG, "BGWorker #%d dummy processing line #%d", myworkernumber, hdr->curr_line);
+        }
+        prev = hdr->curr_line;
+        // SpinLockRelease(&hdr->mutex);
+        LWLockRelease(CopyFromBgwLock);
+    }
+
+    /*
+     * We're done.  Explicitly detach the shared memory segment so that we
+     * don't get a resource leak warning at commit time.  This will fire any
+     * on_dsm_detach callbacks we've registered, as well.  Once that's done,
+     * we can go ahead and exit.
+     */
+    dsm_detach(seg);
+    // proc_exit(1);
+}
+
+/*
+ * When we receive a SIGTERM, we set InterruptPending and ProcDiePending just
+ * like a normal backend.  The next CHECK_FOR_INTERRUPTS() will do the right
+ * thing.
+ */
+static void
+handle_sigterm(SIGNAL_ARGS)
+{
+    int save_errno = errno;
+
+    SetLatch(MyLatch);
+
+    if (!proc_exit_inprogress)
+    {
+        InterruptPending = true;
+        ProcDiePending = true;
+    }
+
+    errno = save_errno;
+}
+
+/*
+ * Set up a dynamic shared memory segment and zero or more background workers
+ * for a test run.
+ */
+static ParallelState*
+shm_mq_setup(int64 queue_size, int32 nworkers, dsm_segment **segp,
+                  shm_mq_handle **output, shm_mq_handle **input)
+{
+    dsm_segment *seg;
+    ParallelState *hdr;
+    shm_mq       *outq = NULL;    /* placate compiler */
+    shm_mq       *inq = NULL;        /* placate compiler */
+    WorkerState *wstate;
+
+    /* Set up a dynamic shared memory segment. */
+    setup_dynamic_shared_memory(queue_size, nworkers, &seg, &hdr, &outq, &inq);
+    *segp = seg;
+
+    /* Register background workers. */
+    wstate = setup_background_workers(nworkers, seg);
+
+    /* Attach the queues. */
+    *output = shm_mq_attach(outq, seg, wstate->handle[0]);
+    *input = shm_mq_attach(inq, seg, wstate->handle[nworkers - 1]);
+
+    /* Wait for workers to become ready. */
+    wait_for_workers_to_become_ready(wstate, hdr);
+
+    /*
+     * Once we reach this point, all workers are ready.  We no longer need to
+     * kill them if we die; they'll die on their own as the message queues
+     * shut down.
+     */
+    // cancel_on_dsm_detach(seg, cleanup_background_workers,
+    //                      PointerGetDatum(wstate));
+    // pfree(wstate);
+
+    return hdr;
+}
+
+/*
+ * Set up a dynamic shared memory segment.
+ *
+ * We set up a small control region that contains only a test_shm_mq_header,
+ * plus one region per message queue.  There are as many message queues as
+ * the number of workers, plus one.
+ */
+static void
+setup_dynamic_shared_memory(int64 queue_size, int nworkers,
+                            dsm_segment **segp, ParallelState **hdrp,
+                            shm_mq **outp, shm_mq **inp)
+{
+    shm_toc_estimator e;
+    int            i;
+    Size        segsize;
+    dsm_segment *seg;
+    shm_toc    *toc;
+    ParallelState *hdr;
+
+    /* Ensure a valid queue size. */
+    if (queue_size < 0 || ((uint64) queue_size) < shm_mq_minimum_size)
+        ereport(ERROR,
+                (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                 errmsg("queue size must be at least %zu bytes",
+                        shm_mq_minimum_size)));
+    if (queue_size != ((Size) queue_size))
+        ereport(ERROR,
+                (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                 errmsg("queue size overflows size_t")));
+
+    /*
+     * Estimate how much shared memory we need.
+     *
+     * Because the TOC machinery may choose to insert padding of oddly-sized
+     * requests, we must estimate each chunk separately.
+     *
+     * We need one key to register the location of the header, and we need
+     * nworkers + 1 keys to track the locations of the message queues.
+     */
+    shm_toc_initialize_estimator(&e);
+    shm_toc_estimate_chunk(&e, sizeof(ParallelState));
+    for (i = 0; i <= nworkers; ++i)
+        shm_toc_estimate_chunk(&e, (Size) queue_size);
+    shm_toc_estimate_keys(&e, 2 + nworkers);
+    segsize = shm_toc_estimate(&e);
+
+    /* Create the shared memory segment and establish a table of contents. */
+    seg = dsm_create(shm_toc_estimate(&e), 0);
+    toc = shm_toc_create(PG_COPY_FROM_SHM_MQ_MAGIC, dsm_segment_address(seg),
+                         segsize);
+
+    /* Set up the header region. */
+    hdr = shm_toc_allocate(toc, sizeof(ParallelState));
+    SpinLockInit(&hdr->mutex);
+    hdr->workers_total = nworkers;
+    hdr->workers_attached = 0;
+    hdr->workers_ready = 0;
+    shm_toc_insert(toc, 0, hdr);
+
+    /* Set up one message queue per worker, plus one. */
+    for (i = 0; i <= nworkers; ++i)
+    {
+        shm_mq       *mq;
+
+        mq = shm_mq_create(shm_toc_allocate(toc, (Size) queue_size),
+                           (Size) queue_size);
+        shm_toc_insert(toc, i + 1, mq);
+
+        if (i == 0)
+        {
+            /* We send messages to the first queue. */
+            shm_mq_set_sender(mq, MyProc);
+            *outp = mq;
+        }
+        if (i == nworkers)
+        {
+            /* We receive messages from the last queue. */
+            shm_mq_set_receiver(mq, MyProc);
+            *inp = mq;
+        }
+    }
+
+    /* Return results to caller. */
+    *segp = seg;
+    *hdrp = hdr;
+}
+
+/*
+ * Register background workers.
+ */
+static WorkerState *
+setup_background_workers(int nworkers, dsm_segment *seg)
+{
+    MemoryContext       oldcontext;
+    BackgroundWorker    worker;
+    WorkerState        *wstate;
+    int                 i;
+
+    /*
+     * We need the worker_state object and the background worker handles to
+     * which it points to be allocated in CurTransactionContext rather than
+     * ExprContext; otherwise, they'll be destroyed before the on_dsm_detach
+     * hooks run.
+     */
+    oldcontext = MemoryContextSwitchTo(CurTransactionContext);
+
+    /* Create worker state object. */
+    wstate = MemoryContextAlloc(TopTransactionContext,
+                                offsetof(WorkerState, handle) +
+                                sizeof(BackgroundWorkerHandle *) * nworkers);
+    wstate->nworkers = 0;
+
+    /*
+     * Arrange to kill all the workers if we abort before all workers are
+     * finished hooking themselves up to the dynamic shared memory segment.
+     *
+     * If we die after all the workers have finished hooking themselves up to
+     * the dynamic shared memory segment, we'll mark the two queues to which
+     * we're directly connected as detached, and the worker(s) connected to
+     * those queues will exit, marking any other queues to which they are
+     * connected as detached.  This will cause any as-yet-unaware workers
+     * connected to those queues to exit in their turn, and so on, until
+     * everybody exits.
+     *
+     * But suppose the workers which are supposed to connect to the queues to
+     * which we're directly attached exit due to some error before they
+     * actually attach the queues.  The remaining workers will have no way of
+     * knowing this.  From their perspective, they're still waiting for those
+     * workers to start, when in fact they've already died.
+     */
+    on_dsm_detach(seg, cleanup_background_workers,
+                  PointerGetDatum(wstate));
+
+    /* Configure a worker. */
+    MemSet(&worker, 0, sizeof(BackgroundWorker));
+
+    worker.bgw_flags = BGWORKER_SHMEM_ACCESS |
+      BGWORKER_BACKEND_DATABASE_CONNECTION;
+    worker.bgw_start_time = BgWorkerStart_ConsistentState;
+    worker.bgw_restart_time = BGW_NEVER_RESTART;
+    worker.bgw_notify_pid = MyProcPid;
+    sprintf(worker.bgw_library_name, "postgres");
+    sprintf(worker.bgw_function_name, "CopyFromBgwMainLoop");
+
+    worker.bgw_main_arg = UInt32GetDatum(dsm_segment_handle(seg));
+
+    /* Register the workers. */
+    for (i = 0; i < nworkers; ++i)
+    {
+        snprintf(worker.bgw_name, BGW_MAXLEN, "copy_from_bgw_pool_worker_%d", i);
+        if (!RegisterDynamicBackgroundWorker(&worker, &wstate->handle[i]))
+            ereport(ERROR,
+                    (errcode(ERRCODE_INSUFFICIENT_RESOURCES),
+                     errmsg("could not register background process"),
+                     errhint("You may need to increase max_worker_processes.")));
+        ++wstate->nworkers;
+    }
+
+    /* All done. */
+    MemoryContextSwitchTo(oldcontext);
+    return wstate;
+}
+
+static void
+cleanup_background_workers(dsm_segment *seg, Datum arg)
+{
+    WorkerState *wstate = (WorkerState *) DatumGetPointer(arg);
+
+    while (wstate->nworkers > 0)
+    {
+        --wstate->nworkers;
+        TerminateBackgroundWorker(wstate->handle[wstate->nworkers]);
+    }
+}
+
+static void
+wait_for_workers_to_become_ready(WorkerState *wstate,
+                                 volatile ParallelState *pst)
+{
+    bool result = false;
+
+    for (;;)
+    {
+        int workers_ready;
+
+        /* If all the workers are ready, we have succeeded. */
+        LWLockAcquire(CopyFromBgwLock, LW_EXCLUSIVE);
+        // SpinLockAcquire(&pst->mutex);
+        workers_ready = pst->workers_ready;
+        // SpinLockRelease(&pst->mutex);
+        LWLockRelease(CopyFromBgwLock);
+        // LWLockAcquire(CopyFromBgwLock, LW_EXCLUSIVE);
+        // workers_ready = pst->workers_ready;
+        // LWLockRelease(CopyFromBgwLock);
+        if (workers_ready >= wstate->nworkers)
+        {
+            result = true;
+            break;
+        }
+
+        /* If any workers (or the postmaster) have died, we have failed. */
+        if (!check_worker_status(wstate))
+        {
+            result = false;
+            break;
+        }
+
+        /* Wait to be signalled. */
+        WaitLatch(MyLatch, WL_LATCH_SET, 0, PG_WAIT_EXTENSION);
+
+        /* Reset the latch so we don't spin. */
+        ResetLatch(MyLatch);
+
+        /* An interrupt may have occurred while we were waiting. */
+        CHECK_FOR_INTERRUPTS();
+    }
+
+    if (!result)
+        ereport(ERROR,
+                (errcode(ERRCODE_INSUFFICIENT_RESOURCES),
+                 errmsg("one or more background workers failed to start")));
+}
+
+static bool
+check_worker_status(WorkerState *wstate)
+{
+    int n;
+
+    /* If any workers (or the postmaster) have died, we have failed. */
+    for (n = 0; n < wstate->nworkers; ++n)
+    {
+        BgwHandleStatus status;
+        pid_t           pid;
+
+        status = GetBackgroundWorkerPid(wstate->handle[n], &pid);
+        if (status == BGWH_STOPPED || status == BGWH_POSTMASTER_DIED)
+            return false;
+    }
+
+    /* Otherwise, things still look OK. */
+    return true;
+}
+
+
+/*
+ * Parallel Copy FROM file to relation.
+ */
+extern uint64
+ParallelCopyFrom(CopyState cstate)
+{
+    CopyFromState   cfstate;
+    ParallelState  *pst;
+    dsm_segment    *seg;
+    shm_mq_handle  *outqh;
+    shm_mq_handle  *inqh;
+    shm_mq_result   res;
+    int64           queue_size = 64;
+    int32           nworkers = max_parallel_workers_per_gather;
+
+    cfstate = (CopyFromStateData *) palloc0(sizeof(CopyFromStateData));
+
+    cfstate->cstate = cstate;
+	cfstate->estate = CreateExecutorState(); /* for ExecConstraints() */
+	cfstate->oldcontext = CurrentMemoryContext;
+	cfstate->mycid = GetCurrentCommandId(true);
+
+	cfstate->saved_resultRelInfo = NULL;
+	cfstate->hi_options = 0; /* start with default heap_insert options */
+    cfstate->processed = 0;
+    cfstate->nBufferedTuples = 0;
+    cfstate->prev_leaf_part_index = -1;
+    cfstate->bufferedTuples = NULL;	/* initialize to silence warning */
+    cfstate->bufferedTuplesSize = 0;
+    cfstate->firstBufferedLineNo = 0;
+
+
+    // BG Worker setup start
+    // BackgroundWorker        worker;
+    // BackgroundWorkerHandle *bgwhandle = NULL;
+    // BgwHandleStatus         bgwstatus;
+    // pid_t                   bgwpid;
+    // int                     bgwarg = 5;
+    //
+    // elog(LOG, "Main COPY process (pid %d): starting BGWorker", MyProcPid);
+    // MemSet(&worker, 0, sizeof(BackgroundWorker));
+    //
+    // worker.bgw_flags = BGWORKER_SHMEM_ACCESS |
+    //     BGWORKER_BACKEND_DATABASE_CONNECTION;
+    // worker.bgw_start_time = BgWorkerStart_ConsistentState;
+    // worker.bgw_restart_time = BGW_NEVER_RESTART;
+    // worker.bgw_notify_pid = MyProcPid;
+    // sprintf(worker.bgw_library_name, "postgres");
+    // sprintf(worker.bgw_function_name, "CopyFromBgwMainLoop");
+    //
+    // snprintf(worker.bgw_name, BGW_MAXLEN, "copy_from_bgw_pool_worker_%d", 1);
+    // // worker.bgw_main_arg = PointerGetDatum(cfstate);
+    // worker.bgw_main_arg = PointerGetDatum(&bgwarg);
+    // BG Worker setup end
+
+	Assert(cfstate->cstate->rel);
+
+    pst = shm_mq_setup(queue_size, nworkers, &seg, &outqh, &inqh);
+
+	/*
+	 * The target must be a plain relation or have an INSTEAD OF INSERT row
+	 * trigger.  (Currently, such triggers are only allowed on views, so we
+	 * only hint about them in the view case.)
+	 */
+	if (cfstate->cstate->rel->rd_rel->relkind != RELKIND_RELATION &&
+		cfstate->cstate->rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE &&
+		!(cfstate->cstate->rel->trigdesc &&
+		  cfstate->cstate->rel->trigdesc->trig_insert_instead_row))
+	{
+		if (cfstate->cstate->rel->rd_rel->relkind == RELKIND_VIEW)
+			ereport(ERROR,
+					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+					 errmsg("cannot copy to view \"%s\"",
+							RelationGetRelationName(cfstate->cstate->rel)),
+					 errhint("To enable copying to a view, provide an INSTEAD OF INSERT trigger.")));
+		else if (cfstate->cstate->rel->rd_rel->relkind == RELKIND_MATVIEW)
+			ereport(ERROR,
+					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+					 errmsg("cannot copy to materialized view \"%s\"",
+							RelationGetRelationName(cfstate->cstate->rel))));
+		else if (cfstate->cstate->rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
+			ereport(ERROR,
+					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+					 errmsg("cannot copy to foreign table \"%s\"",
+							RelationGetRelationName(cfstate->cstate->rel))));
+		else if (cfstate->cstate->rel->rd_rel->relkind == RELKIND_SEQUENCE)
+			ereport(ERROR,
+					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+					 errmsg("cannot copy to sequence \"%s\"",
+							RelationGetRelationName(cfstate->cstate->rel))));
+		else
+			ereport(ERROR,
+					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+					 errmsg("cannot copy to non-table relation \"%s\"",
+							RelationGetRelationName(cfstate->cstate->rel))));
+	}
+
+	cfstate->tupDesc = RelationGetDescr(cfstate->cstate->rel);
+
+	/*----------
+	 * Check to see if we can avoid writing WAL
+	 *
+	 * If archive logging/streaming is not enabled *and* either
+	 *	- table was created in same transaction as this COPY
+	 *	- data is being written to relfilenode created in this transaction
+	 * then we can skip writing WAL.  It's safe because if the transaction
+	 * doesn't commit, we'll discard the table (or the new relfilenode file).
+	 * If it does commit, we'll have done the heap_sync at the bottom of this
+	 * routine first.
+	 *
+	 * As mentioned in comments in utils/rel.h, the in-same-transaction test
+	 * is not always set correctly, since in rare cases rd_newRelfilenodeSubid
+	 * can be cleared before the end of the transaction. The exact case is
+	 * when a relation sets a new relfilenode twice in same transaction, yet
+	 * the second one fails in an aborted subtransaction, e.g.
+	 *
+	 * BEGIN;
+	 * TRUNCATE t;
+	 * SAVEPOINT save;
+	 * TRUNCATE t;
+	 * ROLLBACK TO save;
+	 * COPY ...
+	 *
+	 * Also, if the target file is new-in-transaction, we assume that checking
+	 * FSM for free space is a waste of time, even if we must use WAL because
+	 * of archiving.  This could possibly be wrong, but it's unlikely.
+	 *
+	 * The comments for heap_insert and RelationGetBufferForTuple specify that
+	 * skipping WAL logging is only safe if we ensure that our tuples do not
+	 * go into pages containing tuples from any other transactions --- but this
+	 * must be the case if we have a new table or new relfilenode, so we need
+	 * no additional work to enforce that.
+	 *----------
+	 */
+	/* createSubid is creation check, newRelfilenodeSubid is truncation check */
+	if (cfstate->cstate->rel->rd_createSubid != InvalidSubTransactionId ||
+		cfstate->cstate->rel->rd_newRelfilenodeSubid != InvalidSubTransactionId)
+	{
+		cfstate->hi_options |= HEAP_INSERT_SKIP_FSM;
+		if (!XLogIsNeeded())
+			cfstate->hi_options |= HEAP_INSERT_SKIP_WAL;
+	}
+
+	/*
+	 * Optimize if new relfilenode was created in this subxact or one of its
+	 * committed children and we won't see those rows later as part of an
+	 * earlier scan or command. This ensures that if this subtransaction
+	 * aborts then the frozen rows won't be visible after xact cleanup. Note
+	 * that the stronger test of exactly which subtransaction created it is
+	 * crucial for correctness of this optimization.
+	 */
+	if (cfstate->cstate->freeze)
+	{
+		if (!ThereAreNoPriorRegisteredSnapshots() || !ThereAreNoReadyPortals())
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_TRANSACTION_STATE),
+					 errmsg("cannot perform FREEZE because of prior transaction activity")));
+
+		if (cfstate->cstate->rel->rd_createSubid != GetCurrentSubTransactionId() &&
+			cfstate->cstate->rel->rd_newRelfilenodeSubid != GetCurrentSubTransactionId())
+			ereport(ERROR,
+					(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+					 errmsg("cannot perform FREEZE because the table was not created or truncated in the current subtransaction")));
+
+		cfstate->hi_options |= HEAP_INSERT_FROZEN;
+	}
+
+	/*
+	 * We need a ResultRelInfo so we can use the regular executor's
+	 * index-entry-making machinery.  (There used to be a huge amount of code
+	 * here that basically duplicated execUtils.c ...)
+	 */
+	cfstate->resultRelInfo = makeNode(ResultRelInfo);
+	InitResultRelInfo(cfstate->resultRelInfo,
+					  cfstate->cstate->rel,
+					  1,		/* dummy rangetable index */
+					  NULL,
+					  0);
+
+	ExecOpenIndices(cfstate->resultRelInfo, false);
+
+	cfstate->estate->es_result_relations = cfstate->resultRelInfo;
+	cfstate->estate->es_num_result_relations = 1;
+	cfstate->estate->es_result_relation_info = cfstate->resultRelInfo;
+	cfstate->estate->es_range_table = cfstate->cstate->range_table;
+
+	/* Set up a tuple slot too */
+	cfstate->myslot = ExecInitExtraTupleSlot(cfstate->estate);
+	ExecSetSlotDescriptor(cfstate->myslot, cfstate->tupDesc);
+	/* Triggers might need a slot as well */
+	cfstate->estate->es_trig_tuple_slot = ExecInitExtraTupleSlot(cfstate->estate);
+
+	/*
+	 * It's more efficient to prepare a bunch of tuples for insertion, and
+	 * insert them in one heap_multi_insert() call, than call heap_insert()
+	 * separately for every tuple. However, we can't do that if there are
+	 * BEFORE/INSTEAD OF triggers, or we need to evaluate volatile default
+	 * expressions. Such triggers or expressions might query the table we're
+	 * inserting to, and act differently if the tuples that have already been
+	 * cfstate->processed and prepared for insertion are not there.  We also can't do
+	 * it if the table is partitioned.
+	 */
+	if ((cfstate->resultRelInfo->ri_TrigDesc != NULL &&
+		 (cfstate->resultRelInfo->ri_TrigDesc->trig_insert_before_row ||
+		  cfstate->resultRelInfo->ri_TrigDesc->trig_insert_instead_row)) ||
+		cfstate->cstate->partition_dispatch_info != NULL ||
+		cfstate->cstate->volatile_defexprs)
+	{
+		cfstate->useHeapMultiInsert = false;
+	}
+	else
+	{
+		cfstate->useHeapMultiInsert = true;
+		cfstate->bufferedTuples = palloc(MAX_BUFFERED_TUPLES * sizeof(HeapTuple));
+	}
+
+	/* Prepare to catch AFTER triggers. */
+	AfterTriggerBeginQuery();
+
+	/*
+	 * Check BEFORE STATEMENT insertion triggers. It's debatable whether we
+	 * should do this for COPY, since it's not really an "INSERT" statement as
+	 * such. However, executing these triggers maintains consistency with the
+	 * EACH ROW triggers that we already fire on COPY.
+	 */
+	ExecBSInsertTriggers(cfstate->estate, cfstate->resultRelInfo);
+
+	cfstate->values = (Datum *) palloc(cfstate->tupDesc->natts * sizeof(Datum));
+	cfstate->nulls = (bool *) palloc(cfstate->tupDesc->natts * sizeof(bool));
+
+	cfstate->bistate = GetBulkInsertState();
+	cfstate->econtext = GetPerTupleExprContext(cfstate->estate);
+
+	/* Set up callback to identify error line number */
+	cfstate->errcallback.callback = CopyFromErrorCallback;
+	cfstate->errcallback.arg = (void *) cfstate->cstate;
+	cfstate->errcallback.previous = error_context_stack;
+	error_context_stack = &cfstate->errcallback;
+
+    // // BG Worker startup
+    // RegisterDynamicBackgroundWorker(&worker, &bgwhandle);
+    // bgwstatus = WaitForBackgroundWorkerStartup(bgwhandle, &bgwpid);
+    // elog(LOG, "Main COPY process (pid %d): BGWorker started (pid %d)", MyProcPid, bgwpid);
+    // // BG Worker startup
+
+	for (;;)
+	{
+		TupleTableSlot *slot;
+		bool		skip_tuple;
+		Oid			loaded_oid = InvalidOid;
+        int         next_cf_state; /* NextCopyFrom return state */
+
+		CHECK_FOR_INTERRUPTS();
+
+		if (cfstate->nBufferedTuples == 0)
+		{
+			/*
+			 * Reset the per-tuple exprcontext. We can only do this if the
+			 * tuple buffer is empty. (Calling the context the per-tuple
+			 * memory context is a bit of a misnomer now.)
+			 */
+			ResetPerTupleExprContext(cfstate->estate);
+		}
+
+		/* Switch into its memory context */
+		MemoryContextSwitchTo(GetPerTupleMemoryContext(cfstate->estate));
+
+        next_cf_state = NextCopyFrom(cfstate->cstate, cfstate->econtext, cfstate->values, cfstate->nulls, &loaded_oid);
+
+
+        LWLockAcquire(CopyFromBgwLock, LW_EXCLUSIVE);
+        // SpinLockAcquire(&pst->mutex);
+        pst->curr_line++;
+        // SpinLockRelease(&pst->mutex);
+        LWLockRelease(CopyFromBgwLock);
+
+		if (!next_cf_state) {
+			break;
+		}
+        else if (next_cf_state == NCF_SUCCESS)
+        {
+    		/* And now we can form the input tuple. */
+    		cfstate->tuple = heap_form_tuple(cfstate->tupDesc, cfstate->values, cfstate->nulls);
+
+    		if (loaded_oid != InvalidOid)
+    			HeapTupleSetOid(cfstate->tuple, loaded_oid);
+
+    		/*
+    		 * Constraints might reference the tableoid column, so initialize
+    		 * t_tableOid before evaluating them.
+    		 */
+    		cfstate->tuple->t_tableOid = RelationGetRelid(cfstate->resultRelInfo->ri_RelationDesc);
+
+    		/* Triggers and stuff need to be invoked in query context. */
+    		MemoryContextSwitchTo(cfstate->oldcontext);
+
+    		/* Place tuple in tuple slot --- but slot shouldn't free it */
+    		slot = cfstate->myslot;
+    		ExecStoreTuple(cfstate->tuple, slot, InvalidBuffer, false);
+
+    		/* Determine the partition to heap_insert the tuple into */
+    		if (cfstate->cstate->partition_dispatch_info)
+    		{
+    			int			leaf_part_index;
+    			TupleConversionMap *map;
+
+    			/*
+    			 * Away we go ... If we end up not finding a partition after all,
+    			 * ExecFindPartition() does not return and errors out instead.
+    			 * Otherwise, the returned value is to be used as an index into
+    			 * arrays mt_partitions[] and mt_partition_tupconv_maps[] that
+    			 * will get us the ResultRelInfo and TupleConversionMap for the
+    			 * partition, respectively.
+    			 */
+    			leaf_part_index = ExecFindPartition(cfstate->resultRelInfo,
+    												cfstate->cstate->partition_dispatch_info,
+    												slot,
+    												cfstate->estate);
+    			Assert(leaf_part_index >= 0 &&
+    				   leaf_part_index < cfstate->cstate->num_partitions);
+
+    			/*
+    			 * If this tuple is mapped to a partition that is not same as the
+    			 * previous one, we'd better make the bulk insert mechanism gets a
+    			 * new buffer.
+    			 */
+    			if (cfstate->prev_leaf_part_index != leaf_part_index)
+    			{
+    				ReleaseBulkInsertStatePin(cfstate->bistate);
+    				cfstate->prev_leaf_part_index = leaf_part_index;
+    			}
+
+    			/*
+    			 * Save the old ResultRelInfo and switch to the one corresponding
+    			 * to the selected partition.
+    			 */
+    			cfstate->saved_resultRelInfo = cfstate->resultRelInfo;
+    			cfstate->resultRelInfo = cfstate->cstate->partitions + leaf_part_index;
+
+    			/* We do not yet have a way to insert into a foreign partition */
+    			if (cfstate->resultRelInfo->ri_FdwRoutine)
+    				ereport(ERROR,
+    						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+    						 errmsg("cannot route inserted tuples to a foreign table")));
+
+    			/*
+    			 * For ExecInsertIndexTuples() to work on the partition's indexes
+    			 */
+    			cfstate->estate->es_result_relation_info = cfstate->resultRelInfo;
+
+    			/*
+    			 * If we're capturing transition tuples, we might need to convert
+    			 * from the partition rowtype to parent rowtype.
+    			 */
+    			if (cfstate->cstate->transition_capture != NULL)
+    			{
+    				if (cfstate->resultRelInfo->ri_TrigDesc &&
+    					(cfstate->resultRelInfo->ri_TrigDesc->trig_insert_before_row ||
+    					 cfstate->resultRelInfo->ri_TrigDesc->trig_insert_instead_row))
+    				{
+    					/*
+    					 * If there are any BEFORE or INSTEAD triggers on the
+    					 * partition, we'll have to be ready to convert their
+    					 * result back to tuplestore format.
+    					 */
+    					cfstate->cstate->transition_capture->tcs_original_insert_tuple = NULL;
+    					cfstate->cstate->transition_capture->tcs_map =
+    						cfstate->cstate->transition_tupconv_maps[leaf_part_index];
+    				}
+    				else
+    				{
+    					/*
+    					 * Otherwise, just remember the original unconverted
+    					 * tuple, to avoid a needless round trip conversion.
+    					 */
+    					cfstate->cstate->transition_capture->tcs_original_insert_tuple = cfstate->tuple;
+    					cfstate->cstate->transition_capture->tcs_map = NULL;
+    				}
+    			}
+    			/*
+    			 * We might need to convert from the parent rowtype to the
+    			 * partition rowtype.
+    			 */
+    			map = cfstate->cstate->partition_tupconv_maps[leaf_part_index];
+    			if (map)
+    			{
+    				Relation	partrel = cfstate->resultRelInfo->ri_RelationDesc;
+
+    				cfstate->tuple = do_convert_tuple(cfstate->tuple, map);
+
+    				/*
+    				 * We must use the partition's tuple descriptor from this
+    				 * point on.  Use a dedicated slot from this point on until
+    				 * we're finished dealing with the partition.
+    				 */
+    				slot = cfstate->cstate->partition_tuple_slot;
+    				Assert(slot != NULL);
+    				ExecSetSlotDescriptor(slot, RelationGetDescr(partrel));
+    				ExecStoreTuple(cfstate->tuple, slot, InvalidBuffer, true);
+    			}
+
+    			cfstate->tuple->t_tableOid = RelationGetRelid(cfstate->resultRelInfo->ri_RelationDesc);
+    		}
+
+    		skip_tuple = false;
+
+    		/* BEFORE ROW INSERT Triggers */
+    		if (cfstate->resultRelInfo->ri_TrigDesc &&
+    			cfstate->resultRelInfo->ri_TrigDesc->trig_insert_before_row)
+    		{
+    			slot = ExecBRInsertTriggers(cfstate->estate, cfstate->resultRelInfo, slot);
+
+    			if (slot == NULL)	/* "do nothing" */
+    				skip_tuple = true;
+    			else				/* trigger might have changed tuple */
+    				cfstate->tuple = ExecMaterializeSlot(slot);
+    		}
+        }
+        else
+        {
+            skip_tuple = true;
+        }
+
+		if (!skip_tuple)
+		{
+			if (cfstate->resultRelInfo->ri_TrigDesc &&
+				cfstate->resultRelInfo->ri_TrigDesc->trig_insert_instead_row)
+			{
+				/* Pass the data to the INSTEAD ROW INSERT trigger */
+				ExecIRInsertTriggers(cfstate->estate, cfstate->resultRelInfo, slot);
+			}
+			else
+			{
+				/*
+				 * We always check the partition constraint, including when
+				 * the tuple got here via tuple-routing.  However we don't
+				 * need to in the latter case if no BR trigger is defined on
+				 * the partition.  Note that a BR trigger might modify the
+				 * tuple such that the partition constraint is no longer
+				 * satisfied, so we need to check in that case.
+				 */
+				bool		check_partition_constr =
+				(cfstate->resultRelInfo->ri_PartitionCheck != NIL);
+
+				if (cfstate->saved_resultRelInfo != NULL &&
+					!(cfstate->resultRelInfo->ri_TrigDesc &&
+					  cfstate->resultRelInfo->ri_TrigDesc->trig_insert_before_row))
+					check_partition_constr = false;
+
+				/* Check the constraints of the tuple */
+				if (cfstate->cstate->rel->rd_att->constr || check_partition_constr)
+					ExecConstraints(cfstate->resultRelInfo, slot, cfstate->estate);
+
+				if (cfstate->useHeapMultiInsert)
+				{
+					/* Add this tuple to the tuple buffer */
+					if (cfstate->nBufferedTuples == 0)
+						cfstate->firstBufferedLineNo = cfstate->cstate->cur_lineno;
+					cfstate->bufferedTuples[cfstate->nBufferedTuples++] = cfstate->tuple;
+					cfstate->bufferedTuplesSize += cfstate->tuple->t_len;
+
+					/*
+					 * If the buffer filled up, flush it.  Also flush if the
+					 * total size of all the tuples in the buffer becomes
+					 * large, to avoid using large amounts of memory for the
+					 * buffer when the tuples are exceptionally wide.
+					 */
+					if (cfstate->nBufferedTuples == MAX_BUFFERED_TUPLES ||
+						cfstate->bufferedTuplesSize > 65535)
+					{
+						CopyFromInsertBatch(cfstate->cstate, cfstate->estate, cfstate->mycid, cfstate->hi_options,
+											cfstate->resultRelInfo, cfstate->myslot, cfstate->bistate,
+											cfstate->nBufferedTuples, cfstate->bufferedTuples,
+											cfstate->firstBufferedLineNo);
+						cfstate->nBufferedTuples = 0;
+						cfstate->bufferedTuplesSize = 0;
+					}
+				}
+				else
+				{
+					List	   *recheckIndexes = NIL;
+
+					/* OK, store the tuple and create index entries for it */
+					heap_insert(cfstate->resultRelInfo->ri_RelationDesc, cfstate->tuple, cfstate->mycid,
+								cfstate->hi_options, cfstate->bistate);
+
+					if (cfstate->resultRelInfo->ri_NumIndices > 0)
+						recheckIndexes = ExecInsertIndexTuples(slot,
+															   &(cfstate->tuple->t_self),
+															   cfstate->estate,
+															   false,
+															   NULL,
+															   NIL);
+
+					/* AFTER ROW INSERT Triggers */
+					ExecARInsertTriggers(cfstate->estate, cfstate->resultRelInfo, cfstate->tuple,
+										 recheckIndexes, cfstate->cstate->transition_capture);
+
+					list_free(recheckIndexes);
+				}
+			}
+
+			/*
+			 * We count only tuples not suppressed by a BEFORE INSERT trigger;
+			 * this is the same definition used by execMain.c for counting
+			 * tuples inserted by an INSERT command.
+			 */
+			cfstate->processed++;
+
+			if (cfstate->saved_resultRelInfo)
+			{
+				cfstate->resultRelInfo = cfstate->saved_resultRelInfo;
+				cfstate->estate->es_result_relation_info = cfstate->resultRelInfo;
+			}
+		}
+	}
+
+	/* Flush any remaining buffered tuples */
+	if (cfstate->nBufferedTuples > 0)
+		CopyFromInsertBatch(cfstate->cstate, cfstate->estate, cfstate->mycid, cfstate->hi_options,
+							cfstate->resultRelInfo, cfstate->myslot, cfstate->bistate,
+							cfstate->nBufferedTuples, cfstate->bufferedTuples,
+							cfstate->firstBufferedLineNo);
+
+	/* Done, clean up */
+	error_context_stack = cfstate->errcallback.previous;
+
+	FreeBulkInsertState(cfstate->bistate);
+
+	MemoryContextSwitchTo(cfstate->oldcontext);
+
+	/*
+	 * In the old protocol, tell pqcomm that we can process normal protocol
+	 * messages again.
+	 */
+	if (cfstate->cstate->copy_dest == COPY_OLD_FE)
+		pq_endmsgread();
+
+	/* Execute AFTER STATEMENT insertion triggers */
+	ExecASInsertTriggers(cfstate->estate, cfstate->resultRelInfo, cfstate->cstate->transition_capture);
+
+	/* Handle queued AFTER triggers */
+	AfterTriggerEndQuery(cfstate->estate);
+
+	pfree(cfstate->values);
+	pfree(cfstate->nulls);
+
+	ExecResetTupleTable(cfstate->estate->es_tupleTable, false);
+
+	ExecCloseIndices(cfstate->resultRelInfo);
+
+	/* Close all the partitioned tables, leaf partitions, and their indices */
+	if (cfstate->cstate->partition_dispatch_info)
+	{
+		int			i;
+
+		/*
+		 * Remember cfstate->cstate->partition_dispatch_info[0] corresponds to the root
+		 * partitioned table, which we must not try to close, because it is
+		 * the main target table of COPY that will be closed eventually by
+		 * DoCopy().  Also, tupslot is NULL for the root partitioned table.
+		 */
+		for (i = 1; i < cfstate->cstate->num_dispatch; i++)
+		{
+			PartitionDispatch pd = cfstate->cstate->partition_dispatch_info[i];
+
+			heap_close(pd->reldesc, NoLock);
+			ExecDropSingleTupleTableSlot(pd->tupslot);
+		}
+		for (i = 0; i < cfstate->cstate->num_partitions; i++)
+		{
+			ResultRelInfo *resultRelInfo = cfstate->cstate->partitions + i;
+
+			ExecCloseIndices(resultRelInfo);
+			heap_close(resultRelInfo->ri_RelationDesc, NoLock);
+		}
+
+		/* Release the standalone partition tuple descriptor */
+		ExecDropSingleTupleTableSlot(cfstate->cstate->partition_tuple_slot);
+	}
+
+	/* Close any trigger target relations */
+	ExecCleanUpTriggerState(cfstate->estate);
+
+	FreeExecutorState(cfstate->estate);
+
+	/*
+	 * If we skipped writing WAL, then we need to sync the heap (but not
+	 * indexes since those use WAL anyway)
+	 */
+	if (cfstate->hi_options & HEAP_INSERT_SKIP_WAL)
+		heap_sync(cfstate->cstate->rel);
+
+    /* Clean up. */
+    dsm_detach(seg);
+
+	return cfstate->processed;
+}
diff --git a/src/backend/storage/lmgr/lwlock.c b/src/backend/storage/lmgr/lwlock.c
index 82a1cf5150..b89e17698b 100644
--- a/src/backend/storage/lmgr/lwlock.c
+++ b/src/backend/storage/lmgr/lwlock.c
@@ -494,7 +494,7 @@ RegisterLWLockTranches(void)

 	if (LWLockTrancheArray == NULL)
 	{
-		LWLockTranchesAllocated = 64;
+		LWLockTranchesAllocated = 128;
 		LWLockTrancheArray = (char **)
 			MemoryContextAllocZero(TopMemoryContext,
 								   LWLockTranchesAllocated * sizeof(char *));
@@ -511,6 +511,7 @@ RegisterLWLockTranches(void)
 	LWLockRegisterTranche(LWTRANCHE_PARALLEL_QUERY_DSA,
 						  "parallel_query_dsa");
 	LWLockRegisterTranche(LWTRANCHE_TBM, "tbm");
+    LWLockRegisterTranche(LWTRANCHE_COPY_FROM, "copy_from");

 	/* Register named tranches. */
 	for (i = 0; i < NamedLWLockTrancheRequests; i++)
diff --git a/src/backend/storage/lmgr/lwlocknames.txt b/src/backend/storage/lmgr/lwlocknames.txt
index e6025ecedb..91ab6fd711 100644
--- a/src/backend/storage/lmgr/lwlocknames.txt
+++ b/src/backend/storage/lmgr/lwlocknames.txt
@@ -50,3 +50,4 @@ OldSnapshotTimeMapLock				42
 BackendRandomLock					43
 LogicalRepWorkerLock				44
 CLogTruncationLock					45
+CopyFromBgwLock				46
diff --git a/src/include/commands/copy.h b/src/include/commands/copy.h
index 0b690b39ca..2935e567ae 100644
--- a/src/include/commands/copy.h
+++ b/src/include/commands/copy.h
@@ -21,6 +21,7 @@

 /* CopyStateData is private in commands/copy.c */
 typedef struct CopyStateData *CopyState;
+typedef struct CopyFromStateData *CopyFromState;
 typedef int (*copy_data_source_cb) (void *outbuf, int minread, int maxread);

 extern void DoCopy(ParseState *state, const CopyStmt *stmt,
@@ -39,6 +40,7 @@ extern void CopyFromErrorCallback(void *arg);

 extern void CopyFromBgwMainLoop(Datum main_arg);
 extern uint64 CopyFrom(CopyState cstate);
+extern uint64 ParallelCopyFrom(CopyState cstate);

 extern DestReceiver *CreateCopyDestReceiver(void);

diff --git a/src/include/storage/lwlock.h b/src/include/storage/lwlock.h
index 3d16132c88..beb636936f 100644
--- a/src/include/storage/lwlock.h
+++ b/src/include/storage/lwlock.h
@@ -213,6 +213,7 @@ typedef enum BuiltinTrancheIds
 	LWTRANCHE_PREDICATE_LOCK_MANAGER,
 	LWTRANCHE_PARALLEL_QUERY_DSA,
 	LWTRANCHE_TBM,
+  LWTRANCHE_COPY_FROM,
 	LWTRANCHE_FIRST_USER_DEFINED
 }			BuiltinTrancheIds;

--
2.11.0


From bbb5d120c9164876ac9c30db801c334390920920 Mon Sep 17 00:00:00 2001
From: Alex K <alex.lumir@gmail.com>
Date: Mon, 24 Jul 2017 15:18:21 +0300
Subject: [PATCH 05/13] Pool of dummy BGWorkers v1, receiving line number via
 the personal shm_mq and printing it out

---
 src/backend/commands/copy.c | 157 +++++++++++++++++++++++++-------------------
 1 file changed, 88 insertions(+), 69 deletions(-)

diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index 5a087f33c7..7ce94a12db 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -414,12 +414,11 @@ static bool CopyGetInt16(CopyState cstate, int16 *val);


 static ParallelState* shm_mq_setup(int64 queue_size, int32 nworkers,
-                                   dsm_segment **segp, shm_mq_handle **output,
-                                   shm_mq_handle **input);
+                                   dsm_segment **segp, shm_mq_handle **mq_handles[]);
 static void setup_dynamic_shared_memory(int64 queue_size, int nworkers,
                                         dsm_segment **segp,
-                                        ParallelState **hdrp,
-                                        shm_mq **outp, shm_mq **inp);
+                                        ParallelState **pstp,
+                                        shm_mq **mqs[]);
 static WorkerState *setup_background_workers(int nworkers,
                                              dsm_segment *seg);
 static void cleanup_background_workers(dsm_segment *seg, Datum arg);
@@ -5025,14 +5024,17 @@ CreateCopyDestReceiver(void)
 void
 CopyFromBgwMainLoop(Datum main_arg)
 {
-    volatile ParallelState *hdr;
+    volatile ParallelState *pst;
     dsm_segment     *seg;
     shm_toc         *toc;
-    shm_mq_handle   *inqh;
-    shm_mq_handle   *outqh;
     int              myworkernumber;
     PGPROC          *registrant;
-    int              prev = 0;
+    // int              prev = 0;
+    shm_mq          *mq;
+    shm_mq_handle   *mqh;
+    shm_mq_result    shmq_res;
+    Size             len;
+    void            *data;

     /*
      * Establish signal handlers.
@@ -5074,13 +5076,13 @@ CopyFromBgwMainLoop(Datum main_arg)
      * find it.  Our worker number gives our identity: there may be just one
      * worker involved in this parallel operation, or there may be many.
      */
-    hdr = shm_toc_lookup(toc, 0, false);
+    pst = shm_toc_lookup(toc, 0, false);
     LWLockAcquire(CopyFromBgwLock, LW_EXCLUSIVE);
     // SpinLockAcquire(&pst->mutex);
-    myworkernumber = ++hdr->workers_attached;
+    myworkernumber = ++pst->workers_attached;
     // SpinLockRelease(&pst->mutex);
     LWLockRelease(CopyFromBgwLock);
-    if (myworkernumber > hdr->workers_total)
+    if (myworkernumber > pst->workers_total)
         ereport(ERROR,
                 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
                  errmsg("too many message queue testing workers already")));
@@ -5088,7 +5090,9 @@ CopyFromBgwMainLoop(Datum main_arg)
     /*
      * Attach to the appropriate message queues.
      */
-    // attach_to_queues(seg, toc, myworkernumber, &inqh, &outqh);
+    mq = shm_toc_lookup(toc, myworkernumber, false);
+    shm_mq_set_receiver(mq, MyProc);
+    mqh = shm_mq_attach(mq, seg, NULL);

     /*
      * Indicate that we're fully initialized and ready to begin the main part
@@ -5101,8 +5105,8 @@ CopyFromBgwMainLoop(Datum main_arg)
      */
     LWLockAcquire(CopyFromBgwLock, LW_EXCLUSIVE);
     // SpinLockAcquire(&pst->mutex);
-    ++hdr->workers_ready;
-    elog(LOG, "BGWorker #%d started with curr_line=%d", myworkernumber, hdr->curr_line);
+    ++pst->workers_ready;
+    elog(LOG, "BGWorker #%d started", myworkernumber);
     // SpinLockRelease(&pst->mutex);
     LWLockRelease(CopyFromBgwLock);
     registrant = BackendPidGetProc(MyBgworkerEntry->bgw_notify_pid);
@@ -5119,15 +5123,20 @@ CopyFromBgwMainLoop(Datum main_arg)
     {
         CHECK_FOR_INTERRUPTS();

-        LWLockAcquire(CopyFromBgwLock, LW_EXCLUSIVE);
-        // SpinLockAcquire(&hdr->mutex);
-        if (hdr->curr_line != prev)
-        {
-            elog(LOG, "BGWorker #%d dummy processing line #%d", myworkernumber, hdr->curr_line);
-        }
-        prev = hdr->curr_line;
-        // SpinLockRelease(&hdr->mutex);
-        LWLockRelease(CopyFromBgwLock);
+        // LWLockAcquire(CopyFromBgwLock, LW_EXCLUSIVE);
+        // // SpinLockAcquire(&pst->mutex);
+        // if (pst->curr_line != prev)
+        // {
+        //     elog(LOG, "BGWorker #%d dummy processing line #%d", myworkernumber, pst->curr_line);
+        // }
+        // prev = pst->curr_line;
+        // // SpinLockRelease(&pst->mutex);
+        // LWLockRelease(CopyFromBgwLock);
+
+        shmq_res = shm_mq_receive(mqh, &len, &data, false);
+        if (shmq_res != SHM_MQ_SUCCESS)
+            break;
+        elog(LOG, "BGWorker #%d dummy processing line #%ld", myworkernumber, *(int64 *) data);
     }

     /*
@@ -5167,27 +5176,31 @@ handle_sigterm(SIGNAL_ARGS)
  */
 static ParallelState*
 shm_mq_setup(int64 queue_size, int32 nworkers, dsm_segment **segp,
-                  shm_mq_handle **output, shm_mq_handle **input)
+                  shm_mq_handle ***mq_handles)
 {
-    dsm_segment *seg;
-    ParallelState *hdr;
-    shm_mq       *outq = NULL;    /* placate compiler */
-    shm_mq       *inq = NULL;        /* placate compiler */
-    WorkerState *wstate;
+    int             i;
+    dsm_segment     *seg;
+    ParallelState   *pst;
+    shm_mq         **mqs;
+    WorkerState     *wstate;
+
+    mqs = palloc0(sizeof(shm_mq *) * nworkers);

     /* Set up a dynamic shared memory segment. */
-    setup_dynamic_shared_memory(queue_size, nworkers, &seg, &hdr, &outq, &inq);
+    setup_dynamic_shared_memory(queue_size, nworkers, &seg, &pst, &mqs);
     *segp = seg;

     /* Register background workers. */
     wstate = setup_background_workers(nworkers, seg);

     /* Attach the queues. */
-    *output = shm_mq_attach(outq, seg, wstate->handle[0]);
-    *input = shm_mq_attach(inq, seg, wstate->handle[nworkers - 1]);
+    for (i = 0; i < nworkers; ++i)
+    {
+        (*mq_handles)[i] = shm_mq_attach(mqs[i], seg, wstate->handle[i]);
+    }

     /* Wait for workers to become ready. */
-    wait_for_workers_to_become_ready(wstate, hdr);
+    wait_for_workers_to_become_ready(wstate, pst);

     /*
      * Once we reach this point, all workers are ready.  We no longer need to
@@ -5198,7 +5211,7 @@ shm_mq_setup(int64 queue_size, int32 nworkers, dsm_segment **segp,
     //                      PointerGetDatum(wstate));
     // pfree(wstate);

-    return hdr;
+    return pst;
 }

 /*
@@ -5210,15 +5223,15 @@ shm_mq_setup(int64 queue_size, int32 nworkers, dsm_segment **segp,
  */
 static void
 setup_dynamic_shared_memory(int64 queue_size, int nworkers,
-                            dsm_segment **segp, ParallelState **hdrp,
-                            shm_mq **outp, shm_mq **inp)
+                            dsm_segment **segp, ParallelState **pstp,
+                            shm_mq ***mqs)
 {
     shm_toc_estimator e;
-    int            i;
-    Size        segsize;
-    dsm_segment *seg;
-    shm_toc    *toc;
-    ParallelState *hdr;
+    int               i;
+    Size              segsize;
+    dsm_segment      *seg;
+    shm_toc          *toc;
+    ParallelState    *pst;

     /* Ensure a valid queue size. */
     if (queue_size < 0 || ((uint64) queue_size) < shm_mq_minimum_size)
@@ -5244,7 +5257,7 @@ setup_dynamic_shared_memory(int64 queue_size, int nworkers,
     shm_toc_estimate_chunk(&e, sizeof(ParallelState));
     for (i = 0; i <= nworkers; ++i)
         shm_toc_estimate_chunk(&e, (Size) queue_size);
-    shm_toc_estimate_keys(&e, 2 + nworkers);
+    shm_toc_estimate_keys(&e, 1 + nworkers);
     segsize = shm_toc_estimate(&e);

     /* Create the shared memory segment and establish a table of contents. */
@@ -5253,39 +5266,28 @@ setup_dynamic_shared_memory(int64 queue_size, int nworkers,
                          segsize);

     /* Set up the header region. */
-    hdr = shm_toc_allocate(toc, sizeof(ParallelState));
-    SpinLockInit(&hdr->mutex);
-    hdr->workers_total = nworkers;
-    hdr->workers_attached = 0;
-    hdr->workers_ready = 0;
-    shm_toc_insert(toc, 0, hdr);
+    pst = shm_toc_allocate(toc, sizeof(ParallelState));
+    SpinLockInit(&pst->mutex);
+    pst->workers_total = nworkers;
+    pst->workers_attached = 0;
+    pst->workers_ready = 0;
+    shm_toc_insert(toc, 0, pst);

     /* Set up one message queue per worker, plus one. */
-    for (i = 0; i <= nworkers; ++i)
+    for (i = 0; i < nworkers; ++i)
     {
-        shm_mq       *mq;
+        shm_mq *mq;

         mq = shm_mq_create(shm_toc_allocate(toc, (Size) queue_size),
                            (Size) queue_size);
         shm_toc_insert(toc, i + 1, mq);
-
-        if (i == 0)
-        {
-            /* We send messages to the first queue. */
-            shm_mq_set_sender(mq, MyProc);
-            *outp = mq;
-        }
-        if (i == nworkers)
-        {
-            /* We receive messages from the last queue. */
-            shm_mq_set_receiver(mq, MyProc);
-            *inp = mq;
-        }
+        shm_mq_set_sender(mq, MyProc);
+        (*mqs)[i] = mq;
     }

     /* Return results to caller. */
     *segp = seg;
-    *hdrp = hdr;
+    *pstp = pst;
 }

 /*
@@ -5454,11 +5456,17 @@ ParallelCopyFrom(CopyState cstate)
     CopyFromState   cfstate;
     ParallelState  *pst;
     dsm_segment    *seg;
-    shm_mq_handle  *outqh;
-    shm_mq_handle  *inqh;
-    shm_mq_result   res;
-    int64           queue_size = 64;
     int32           nworkers = max_parallel_workers_per_gather;
+    int64           queue_size = 8;
+    shm_mq_handle **mq_handles;
+    shm_mq_result   shmq_res;
+    int             last_worker_used = 0;
+    int64           message = 0;
+    // char           *message_contents = VARDATA_ANY(message);
+    // int             message_size = VARSIZE_ANY_EXHDR(message);
+    int             message_size = sizeof(message);
+
+    mq_handles = palloc0(sizeof(shm_mq_handle *) * nworkers);

     cfstate = (CopyFromStateData *) palloc0(sizeof(CopyFromStateData));

@@ -5502,7 +5510,7 @@ ParallelCopyFrom(CopyState cstate)

 	Assert(cfstate->cstate->rel);

-    pst = shm_mq_setup(queue_size, nworkers, &seg, &outqh, &inqh);
+    pst = shm_mq_setup(queue_size * message_size, nworkers, &seg, &mq_handles);

 	/*
 	 * The target must be a plain relation or have an INSTEAD OF INSERT row
@@ -5721,6 +5729,17 @@ ParallelCopyFrom(CopyState cstate)
         // SpinLockRelease(&pst->mutex);
         LWLockRelease(CopyFromBgwLock);

+        if (next_cf_state) {
+            message = pst->curr_line;
+            shmq_res = shm_mq_send(mq_handles[++last_worker_used - 1], message_size, &message, false);
+            if (shmq_res != SHM_MQ_SUCCESS)
+                ereport(ERROR,
+                        (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+                         errmsg("could not send message")));
+            if (last_worker_used == nworkers)
+                last_worker_used = 0;
+        }
+
 		if (!next_cf_state) {
 			break;
 		}
--
2.11.0


From 34d1f670fa0876fd33b5a5595eb52d98b7a386e1 Mon Sep 17 00:00:00 2001
From: Alex K <alex.lumir@gmail.com>
Date: Mon, 31 Jul 2017 21:54:27 +0300
Subject: [PATCH 06/13] Sending raw line to BGWorkers

---
 src/backend/commands/copy.c | 470 +++++++++++++++++++++-----------------------
 1 file changed, 221 insertions(+), 249 deletions(-)

diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index 7ce94a12db..b03fbacfa1 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -247,35 +247,6 @@ typedef struct
 	uint64		processed;		/* # of tuples processed */
 } DR_copy;

-typedef struct CopyFromStateData
-{
-    CopyState       cstate;
-    HeapTuple       tuple;
-    TupleDesc       tupDesc;
-    Datum           *values;
-    bool            *nulls;
-    ResultRelInfo   *resultRelInfo;
-    ResultRelInfo   *saved_resultRelInfo;
-    EState          *estate;
-    ExprContext     *econtext;
-    TupleTableSlot  *myslot;
-    MemoryContext   oldcontext;
-
-    ErrorContextCallback errcallback;
-    CommandId            mycid;
-    int                  hi_options;
-    BulkInsertState      bistate;
-    uint64               processed;
-    bool                 useHeapMultiInsert;
-    int                  nBufferedTuples;
-    int                  prev_leaf_part_index;
-
-#define MAX_BUFFERED_TUPLES 1000
-    HeapTuple           *bufferedTuples;
-    Size                 bufferedTuplesSize;
-    int                  firstBufferedLineNo;
-} CopyFromStateData;
-
 typedef struct
 {
     int                     nworkers;
@@ -289,8 +260,11 @@ typedef struct
     int workers_total;
     int workers_attached;
     int workers_ready;
+    Oid database_id;
+    Oid authenticated_user_id;
 } ParallelState;

+// TODO Consider change
 #define PG_COPY_FROM_SHM_MQ_MAGIC 0x79fb2447

 /*
@@ -413,7 +387,7 @@ static void CopySendInt16(CopyState cstate, int16 val);
 static bool CopyGetInt16(CopyState cstate, int16 *val);


-static ParallelState* shm_mq_setup(int64 queue_size, int32 nworkers,
+static ParallelState* shm_mq_setup(int64 queue_size, int32 nworkers,
                                    dsm_segment **segp, shm_mq_handle **mq_handles[]);
 static void setup_dynamic_shared_memory(int64 queue_size, int nworkers,
                                         dsm_segment **segp,
@@ -3407,7 +3381,7 @@ NextCopyFrom(CopyState cstate, ExprContext *econtext,
 	num_phys_attrs = tupDesc->natts;
 	attr_count = list_length(cstate->attnumlist);
 	nfields = file_has_oids ? (attr_count + 1) : attr_count;
-
+
     /* Set error level to WARNING, if errors handling is turned on */
     if (cstate->ignore_errors)
     {
@@ -5029,7 +5003,6 @@ CopyFromBgwMainLoop(Datum main_arg)
     shm_toc         *toc;
     int              myworkernumber;
     PGPROC          *registrant;
-    // int              prev = 0;
     shm_mq          *mq;
     shm_mq_handle   *mqh;
     shm_mq_result    shmq_res;
@@ -5094,6 +5067,16 @@ CopyFromBgwMainLoop(Datum main_arg)
     shm_mq_set_receiver(mq, MyProc);
     mqh = shm_mq_attach(mq, seg, NULL);

+    /* Restore database connection. */
+    BackgroundWorkerInitializeConnectionByOid(pst->database_id,
+                                              pst->authenticated_user_id);
+
+    /*
+     * Set the client encoding to the database encoding, since that is what
+     * the leader will expect.
+     */
+    SetClientEncoding(GetDatabaseEncoding());
+
     /*
      * Indicate that we're fully initialized and ready to begin the main part
      * of the parallel operation.
@@ -5121,22 +5104,17 @@ CopyFromBgwMainLoop(Datum main_arg)
     // copy_messages(inqh, outqh);
     for (;;)
     {
-        CHECK_FOR_INTERRUPTS();
+        char *msg;

-        // LWLockAcquire(CopyFromBgwLock, LW_EXCLUSIVE);
-        // // SpinLockAcquire(&pst->mutex);
-        // if (pst->curr_line != prev)
-        // {
-        //     elog(LOG, "BGWorker #%d dummy processing line #%d", myworkernumber, pst->curr_line);
-        // }
-        // prev = pst->curr_line;
-        // // SpinLockRelease(&pst->mutex);
-        // LWLockRelease(CopyFromBgwLock);
+        CHECK_FOR_INTERRUPTS();

         shmq_res = shm_mq_receive(mqh, &len, &data, false);
+        msg = (char *) palloc(len);
+        msg = (char *) data;
         if (shmq_res != SHM_MQ_SUCCESS)
             break;
-        elog(LOG, "BGWorker #%d dummy processing line #%ld", myworkernumber, *(int64 *) data);
+        // elog(LOG, "BGWorker #%d dummy processing line #%ld", myworkernumber, *(int64 *) data);
+        elog(LOG, "BGWorker #%d dummy processing line: %s", myworkernumber, msg);
     }

     /*
@@ -5271,6 +5249,8 @@ setup_dynamic_shared_memory(int64 queue_size, int nworkers,
     pst->workers_total = nworkers;
     pst->workers_attached = 0;
     pst->workers_ready = 0;
+    pst->database_id = MyDatabaseId;
+    pst->authenticated_user_id = GetAuthenticatedUserId();
     shm_toc_insert(toc, 0, pst);

     /* Set up one message queue per worker, plus one. */
@@ -5453,104 +5433,90 @@ check_worker_status(WorkerState *wstate)
 extern uint64
 ParallelCopyFrom(CopyState cstate)
 {
-    CopyFromState   cfstate;
     ParallelState  *pst;
     dsm_segment    *seg;
     int32           nworkers = max_parallel_workers_per_gather;
-    int64           queue_size = 8;
+    int64           queue_size = 100;
     shm_mq_handle **mq_handles;
     shm_mq_result   shmq_res;
     int             last_worker_used = 0;
-    int64           message = 0;
+    // int64           message = 0;
+    char           *message;
     // char           *message_contents = VARDATA_ANY(message);
     // int             message_size = VARSIZE_ANY_EXHDR(message);
-    int             message_size = sizeof(message);
+    int             message_size = sizeof(char);

     mq_handles = palloc0(sizeof(shm_mq_handle *) * nworkers);

-    cfstate = (CopyFromStateData *) palloc0(sizeof(CopyFromStateData));
-
-    cfstate->cstate = cstate;
-	cfstate->estate = CreateExecutorState(); /* for ExecConstraints() */
-	cfstate->oldcontext = CurrentMemoryContext;
-	cfstate->mycid = GetCurrentCommandId(true);
-
-	cfstate->saved_resultRelInfo = NULL;
-	cfstate->hi_options = 0; /* start with default heap_insert options */
-    cfstate->processed = 0;
-    cfstate->nBufferedTuples = 0;
-    cfstate->prev_leaf_part_index = -1;
-    cfstate->bufferedTuples = NULL;	/* initialize to silence warning */
-    cfstate->bufferedTuplesSize = 0;
-    cfstate->firstBufferedLineNo = 0;
-
-
-    // BG Worker setup start
-    // BackgroundWorker        worker;
-    // BackgroundWorkerHandle *bgwhandle = NULL;
-    // BgwHandleStatus         bgwstatus;
-    // pid_t                   bgwpid;
-    // int                     bgwarg = 5;
-    //
-    // elog(LOG, "Main COPY process (pid %d): starting BGWorker", MyProcPid);
-    // MemSet(&worker, 0, sizeof(BackgroundWorker));
-    //
-    // worker.bgw_flags = BGWORKER_SHMEM_ACCESS |
-    //     BGWORKER_BACKEND_DATABASE_CONNECTION;
-    // worker.bgw_start_time = BgWorkerStart_ConsistentState;
-    // worker.bgw_restart_time = BGW_NEVER_RESTART;
-    // worker.bgw_notify_pid = MyProcPid;
-    // sprintf(worker.bgw_library_name, "postgres");
-    // sprintf(worker.bgw_function_name, "CopyFromBgwMainLoop");
-    //
-    // snprintf(worker.bgw_name, BGW_MAXLEN, "copy_from_bgw_pool_worker_%d", 1);
-    // // worker.bgw_main_arg = PointerGetDatum(cfstate);
-    // worker.bgw_main_arg = PointerGetDatum(&bgwarg);
-    // BG Worker setup end
-
-	Assert(cfstate->cstate->rel);
-
-    pst = shm_mq_setup(queue_size * message_size, nworkers, &seg, &mq_handles);
+	HeapTuple	tuple;
+	TupleDesc	tupDesc;
+	Datum	   *values;
+	bool	   *nulls;
+	ResultRelInfo *resultRelInfo;
+	ResultRelInfo *saved_resultRelInfo = NULL;
+	EState	   *estate = CreateExecutorState(); /* for ExecConstraints() */
+	ExprContext *econtext;
+	TupleTableSlot *myslot;
+	MemoryContext oldcontext = CurrentMemoryContext;
+
+	ErrorContextCallback errcallback;
+	CommandId	mycid = GetCurrentCommandId(true);
+	int			hi_options = 0; /* start with default heap_insert options */
+	BulkInsertState bistate;
+	uint64		processed = 0;
+	bool		useHeapMultiInsert;
+	int			nBufferedTuples = 0;
+	int			prev_leaf_part_index = -1;
+
+#define MAX_BUFFERED_TUPLES 1000
+	HeapTuple  *bufferedTuples = NULL;	/* initialize to silence warning */
+	Size		bufferedTuplesSize = 0;
+	int			firstBufferedLineNo = 0;
+
+	Assert(cstate->rel);
+
+    // MQ size = 100 messages x 80 chars each
+    pst = shm_mq_setup(message_size * 80 * queue_size, nworkers, &seg, &mq_handles);

 	/*
 	 * The target must be a plain relation or have an INSTEAD OF INSERT row
 	 * trigger.  (Currently, such triggers are only allowed on views, so we
 	 * only hint about them in the view case.)
 	 */
-	if (cfstate->cstate->rel->rd_rel->relkind != RELKIND_RELATION &&
-		cfstate->cstate->rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE &&
-		!(cfstate->cstate->rel->trigdesc &&
-		  cfstate->cstate->rel->trigdesc->trig_insert_instead_row))
+	if (cstate->rel->rd_rel->relkind != RELKIND_RELATION &&
+		cstate->rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE &&
+		!(cstate->rel->trigdesc &&
+		  cstate->rel->trigdesc->trig_insert_instead_row))
 	{
-		if (cfstate->cstate->rel->rd_rel->relkind == RELKIND_VIEW)
+		if (cstate->rel->rd_rel->relkind == RELKIND_VIEW)
 			ereport(ERROR,
 					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
 					 errmsg("cannot copy to view \"%s\"",
-							RelationGetRelationName(cfstate->cstate->rel)),
+							RelationGetRelationName(cstate->rel)),
 					 errhint("To enable copying to a view, provide an INSTEAD OF INSERT trigger.")));
-		else if (cfstate->cstate->rel->rd_rel->relkind == RELKIND_MATVIEW)
+		else if (cstate->rel->rd_rel->relkind == RELKIND_MATVIEW)
 			ereport(ERROR,
 					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
 					 errmsg("cannot copy to materialized view \"%s\"",
-							RelationGetRelationName(cfstate->cstate->rel))));
-		else if (cfstate->cstate->rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
+							RelationGetRelationName(cstate->rel))));
+		else if (cstate->rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
 			ereport(ERROR,
 					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
 					 errmsg("cannot copy to foreign table \"%s\"",
-							RelationGetRelationName(cfstate->cstate->rel))));
-		else if (cfstate->cstate->rel->rd_rel->relkind == RELKIND_SEQUENCE)
+							RelationGetRelationName(cstate->rel))));
+		else if (cstate->rel->rd_rel->relkind == RELKIND_SEQUENCE)
 			ereport(ERROR,
 					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
 					 errmsg("cannot copy to sequence \"%s\"",
-							RelationGetRelationName(cfstate->cstate->rel))));
+							RelationGetRelationName(cstate->rel))));
 		else
 			ereport(ERROR,
 					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
 					 errmsg("cannot copy to non-table relation \"%s\"",
-							RelationGetRelationName(cfstate->cstate->rel))));
+							RelationGetRelationName(cstate->rel))));
 	}

-	cfstate->tupDesc = RelationGetDescr(cfstate->cstate->rel);
+	tupDesc = RelationGetDescr(cstate->rel);

 	/*----------
 	 * Check to see if we can avoid writing WAL
@@ -5588,12 +5554,12 @@ ParallelCopyFrom(CopyState cstate)
 	 *----------
 	 */
 	/* createSubid is creation check, newRelfilenodeSubid is truncation check */
-	if (cfstate->cstate->rel->rd_createSubid != InvalidSubTransactionId ||
-		cfstate->cstate->rel->rd_newRelfilenodeSubid != InvalidSubTransactionId)
+	if (cstate->rel->rd_createSubid != InvalidSubTransactionId ||
+		cstate->rel->rd_newRelfilenodeSubid != InvalidSubTransactionId)
 	{
-		cfstate->hi_options |= HEAP_INSERT_SKIP_FSM;
+		hi_options |= HEAP_INSERT_SKIP_FSM;
 		if (!XLogIsNeeded())
-			cfstate->hi_options |= HEAP_INSERT_SKIP_WAL;
+			hi_options |= HEAP_INSERT_SKIP_WAL;
 	}

 	/*
@@ -5604,20 +5570,20 @@ ParallelCopyFrom(CopyState cstate)
 	 * that the stronger test of exactly which subtransaction created it is
 	 * crucial for correctness of this optimization.
 	 */
-	if (cfstate->cstate->freeze)
+	if (cstate->freeze)
 	{
 		if (!ThereAreNoPriorRegisteredSnapshots() || !ThereAreNoReadyPortals())
 			ereport(ERROR,
 					(errcode(ERRCODE_INVALID_TRANSACTION_STATE),
 					 errmsg("cannot perform FREEZE because of prior transaction activity")));

-		if (cfstate->cstate->rel->rd_createSubid != GetCurrentSubTransactionId() &&
-			cfstate->cstate->rel->rd_newRelfilenodeSubid != GetCurrentSubTransactionId())
+		if (cstate->rel->rd_createSubid != GetCurrentSubTransactionId() &&
+			cstate->rel->rd_newRelfilenodeSubid != GetCurrentSubTransactionId())
 			ereport(ERROR,
 					(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
 					 errmsg("cannot perform FREEZE because the table was not created or truncated in the current subtransaction")));

-		cfstate->hi_options |= HEAP_INSERT_FROZEN;
+		hi_options |= HEAP_INSERT_FROZEN;
 	}

 	/*
@@ -5625,25 +5591,25 @@ ParallelCopyFrom(CopyState cstate)
 	 * index-entry-making machinery.  (There used to be a huge amount of code
 	 * here that basically duplicated execUtils.c ...)
 	 */
-	cfstate->resultRelInfo = makeNode(ResultRelInfo);
-	InitResultRelInfo(cfstate->resultRelInfo,
-					  cfstate->cstate->rel,
+	resultRelInfo = makeNode(ResultRelInfo);
+	InitResultRelInfo(resultRelInfo,
+					  cstate->rel,
 					  1,		/* dummy rangetable index */
 					  NULL,
 					  0);

-	ExecOpenIndices(cfstate->resultRelInfo, false);
+	ExecOpenIndices(resultRelInfo, false);

-	cfstate->estate->es_result_relations = cfstate->resultRelInfo;
-	cfstate->estate->es_num_result_relations = 1;
-	cfstate->estate->es_result_relation_info = cfstate->resultRelInfo;
-	cfstate->estate->es_range_table = cfstate->cstate->range_table;
+	estate->es_result_relations = resultRelInfo;
+	estate->es_num_result_relations = 1;
+	estate->es_result_relation_info = resultRelInfo;
+	estate->es_range_table = cstate->range_table;

 	/* Set up a tuple slot too */
-	cfstate->myslot = ExecInitExtraTupleSlot(cfstate->estate);
-	ExecSetSlotDescriptor(cfstate->myslot, cfstate->tupDesc);
+	myslot = ExecInitExtraTupleSlot(estate);
+	ExecSetSlotDescriptor(myslot, tupDesc);
 	/* Triggers might need a slot as well */
-	cfstate->estate->es_trig_tuple_slot = ExecInitExtraTupleSlot(cfstate->estate);
+	estate->es_trig_tuple_slot = ExecInitExtraTupleSlot(estate);

 	/*
 	 * It's more efficient to prepare a bunch of tuples for insertion, and
@@ -5652,21 +5618,21 @@ ParallelCopyFrom(CopyState cstate)
 	 * BEFORE/INSTEAD OF triggers, or we need to evaluate volatile default
 	 * expressions. Such triggers or expressions might query the table we're
 	 * inserting to, and act differently if the tuples that have already been
-	 * cfstate->processed and prepared for insertion are not there.  We also can't do
+	 * processed and prepared for insertion are not there.  We also can't do
 	 * it if the table is partitioned.
 	 */
-	if ((cfstate->resultRelInfo->ri_TrigDesc != NULL &&
-		 (cfstate->resultRelInfo->ri_TrigDesc->trig_insert_before_row ||
-		  cfstate->resultRelInfo->ri_TrigDesc->trig_insert_instead_row)) ||
-		cfstate->cstate->partition_dispatch_info != NULL ||
-		cfstate->cstate->volatile_defexprs)
+	if ((resultRelInfo->ri_TrigDesc != NULL &&
+		 (resultRelInfo->ri_TrigDesc->trig_insert_before_row ||
+		  resultRelInfo->ri_TrigDesc->trig_insert_instead_row)) ||
+		cstate->partition_dispatch_info != NULL ||
+		cstate->volatile_defexprs)
 	{
-		cfstate->useHeapMultiInsert = false;
+		useHeapMultiInsert = false;
 	}
 	else
 	{
-		cfstate->useHeapMultiInsert = true;
-		cfstate->bufferedTuples = palloc(MAX_BUFFERED_TUPLES * sizeof(HeapTuple));
+		useHeapMultiInsert = true;
+		bufferedTuples = palloc(MAX_BUFFERED_TUPLES * sizeof(HeapTuple));
 	}

 	/* Prepare to catch AFTER triggers. */
@@ -5678,19 +5644,19 @@ ParallelCopyFrom(CopyState cstate)
 	 * such. However, executing these triggers maintains consistency with the
 	 * EACH ROW triggers that we already fire on COPY.
 	 */
-	ExecBSInsertTriggers(cfstate->estate, cfstate->resultRelInfo);
+	ExecBSInsertTriggers(estate, resultRelInfo);

-	cfstate->values = (Datum *) palloc(cfstate->tupDesc->natts * sizeof(Datum));
-	cfstate->nulls = (bool *) palloc(cfstate->tupDesc->natts * sizeof(bool));
+	values = (Datum *) palloc(tupDesc->natts * sizeof(Datum));
+	nulls = (bool *) palloc(tupDesc->natts * sizeof(bool));

-	cfstate->bistate = GetBulkInsertState();
-	cfstate->econtext = GetPerTupleExprContext(cfstate->estate);
+	bistate = GetBulkInsertState();
+	econtext = GetPerTupleExprContext(estate);

 	/* Set up callback to identify error line number */
-	cfstate->errcallback.callback = CopyFromErrorCallback;
-	cfstate->errcallback.arg = (void *) cfstate->cstate;
-	cfstate->errcallback.previous = error_context_stack;
-	error_context_stack = &cfstate->errcallback;
+	errcallback.callback = CopyFromErrorCallback;
+	errcallback.arg = (void *) cstate;
+	errcallback.previous = error_context_stack;
+	error_context_stack = &errcallback;

     // // BG Worker startup
     // RegisterDynamicBackgroundWorker(&worker, &bgwhandle);
@@ -5707,31 +5673,37 @@ ParallelCopyFrom(CopyState cstate)

 		CHECK_FOR_INTERRUPTS();

-		if (cfstate->nBufferedTuples == 0)
+		if (nBufferedTuples == 0)
 		{
 			/*
 			 * Reset the per-tuple exprcontext. We can only do this if the
 			 * tuple buffer is empty. (Calling the context the per-tuple
 			 * memory context is a bit of a misnomer now.)
 			 */
-			ResetPerTupleExprContext(cfstate->estate);
+			ResetPerTupleExprContext(estate);
 		}

 		/* Switch into its memory context */
-		MemoryContextSwitchTo(GetPerTupleMemoryContext(cfstate->estate));
+		MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));

-        next_cf_state = NextCopyFrom(cfstate->cstate, cfstate->econtext, cfstate->values, cfstate->nulls, &loaded_oid);
+        next_cf_state = NextCopyFrom(cstate, econtext, values, nulls, &loaded_oid);


-        LWLockAcquire(CopyFromBgwLock, LW_EXCLUSIVE);
-        // SpinLockAcquire(&pst->mutex);
-        pst->curr_line++;
-        // SpinLockRelease(&pst->mutex);
-        LWLockRelease(CopyFromBgwLock);
+        // LWLockAcquire(CopyFromBgwLock, LW_EXCLUSIVE);
+        // // SpinLockAcquire(&pst->mutex);
+        // pst->curr_line++;
+        // // SpinLockRelease(&pst->mutex);
+        // LWLockRelease(CopyFromBgwLock);

         if (next_cf_state) {
-            message = pst->curr_line;
-            shmq_res = shm_mq_send(mq_handles[++last_worker_used - 1], message_size, &message, false);
+            // message = pst->curr_line;
+            // cur_ptr = cstate->line_buf.data;
+            // line_end_ptr = cstate->line_buf.data + cstate->line_buf.len;
+    		message = (char *) palloc(cstate->line_buf.len);
+    		memcpy(message, cstate->line_buf.data, cstate->line_buf.len);
+            // shmq_res = shm_mq_send(mq_handles[++last_worker_used - 1], message_size, &message, false);
+            elog(LOG, "Sending line #%d '%s' to BGWorker #%d", cstate->cur_lineno, message, last_worker_used + 1);
+            shmq_res = shm_mq_send(mq_handles[++last_worker_used - 1], cstate->line_buf.len, message, false);
             if (shmq_res != SHM_MQ_SUCCESS)
                 ereport(ERROR,
                         (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
@@ -5746,26 +5718,26 @@ ParallelCopyFrom(CopyState cstate)
         else if (next_cf_state == NCF_SUCCESS)
         {
     		/* And now we can form the input tuple. */
-    		cfstate->tuple = heap_form_tuple(cfstate->tupDesc, cfstate->values, cfstate->nulls);
+    		tuple = heap_form_tuple(tupDesc, values, nulls);

     		if (loaded_oid != InvalidOid)
-    			HeapTupleSetOid(cfstate->tuple, loaded_oid);
+    			HeapTupleSetOid(tuple, loaded_oid);

     		/*
     		 * Constraints might reference the tableoid column, so initialize
     		 * t_tableOid before evaluating them.
     		 */
-    		cfstate->tuple->t_tableOid = RelationGetRelid(cfstate->resultRelInfo->ri_RelationDesc);
+    		tuple->t_tableOid = RelationGetRelid(resultRelInfo->ri_RelationDesc);

     		/* Triggers and stuff need to be invoked in query context. */
-    		MemoryContextSwitchTo(cfstate->oldcontext);
+    		MemoryContextSwitchTo(oldcontext);

     		/* Place tuple in tuple slot --- but slot shouldn't free it */
-    		slot = cfstate->myslot;
-    		ExecStoreTuple(cfstate->tuple, slot, InvalidBuffer, false);
+    		slot = myslot;
+    		ExecStoreTuple(tuple, slot, InvalidBuffer, false);

     		/* Determine the partition to heap_insert the tuple into */
-    		if (cfstate->cstate->partition_dispatch_info)
+    		if (cstate->partition_dispatch_info)
     		{
     			int			leaf_part_index;
     			TupleConversionMap *map;
@@ -5778,33 +5750,33 @@ ParallelCopyFrom(CopyState cstate)
     			 * will get us the ResultRelInfo and TupleConversionMap for the
     			 * partition, respectively.
     			 */
-    			leaf_part_index = ExecFindPartition(cfstate->resultRelInfo,
-    												cfstate->cstate->partition_dispatch_info,
+    			leaf_part_index = ExecFindPartition(resultRelInfo,
+    												cstate->partition_dispatch_info,
     												slot,
-    												cfstate->estate);
+    												estate);
     			Assert(leaf_part_index >= 0 &&
-    				   leaf_part_index < cfstate->cstate->num_partitions);
+    				   leaf_part_index < cstate->num_partitions);

     			/*
     			 * If this tuple is mapped to a partition that is not same as the
     			 * previous one, we'd better make the bulk insert mechanism gets a
     			 * new buffer.
     			 */
-    			if (cfstate->prev_leaf_part_index != leaf_part_index)
+    			if (prev_leaf_part_index != leaf_part_index)
     			{
-    				ReleaseBulkInsertStatePin(cfstate->bistate);
-    				cfstate->prev_leaf_part_index = leaf_part_index;
+    				ReleaseBulkInsertStatePin(bistate);
+    				prev_leaf_part_index = leaf_part_index;
     			}

     			/*
     			 * Save the old ResultRelInfo and switch to the one corresponding
     			 * to the selected partition.
     			 */
-    			cfstate->saved_resultRelInfo = cfstate->resultRelInfo;
-    			cfstate->resultRelInfo = cfstate->cstate->partitions + leaf_part_index;
+    			saved_resultRelInfo = resultRelInfo;
+    			resultRelInfo = cstate->partitions + leaf_part_index;

     			/* We do not yet have a way to insert into a foreign partition */
-    			if (cfstate->resultRelInfo->ri_FdwRoutine)
+    			if (resultRelInfo->ri_FdwRoutine)
     				ereport(ERROR,
     						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     						 errmsg("cannot route inserted tuples to a foreign table")));
@@ -5812,26 +5784,26 @@ ParallelCopyFrom(CopyState cstate)
     			/*
     			 * For ExecInsertIndexTuples() to work on the partition's indexes
     			 */
-    			cfstate->estate->es_result_relation_info = cfstate->resultRelInfo;
+    			estate->es_result_relation_info = resultRelInfo;

     			/*
     			 * If we're capturing transition tuples, we might need to convert
     			 * from the partition rowtype to parent rowtype.
     			 */
-    			if (cfstate->cstate->transition_capture != NULL)
+    			if (cstate->transition_capture != NULL)
     			{
-    				if (cfstate->resultRelInfo->ri_TrigDesc &&
-    					(cfstate->resultRelInfo->ri_TrigDesc->trig_insert_before_row ||
-    					 cfstate->resultRelInfo->ri_TrigDesc->trig_insert_instead_row))
+    				if (resultRelInfo->ri_TrigDesc &&
+    					(resultRelInfo->ri_TrigDesc->trig_insert_before_row ||
+    					 resultRelInfo->ri_TrigDesc->trig_insert_instead_row))
     				{
     					/*
     					 * If there are any BEFORE or INSTEAD triggers on the
     					 * partition, we'll have to be ready to convert their
     					 * result back to tuplestore format.
     					 */
-    					cfstate->cstate->transition_capture->tcs_original_insert_tuple = NULL;
-    					cfstate->cstate->transition_capture->tcs_map =
-    						cfstate->cstate->transition_tupconv_maps[leaf_part_index];
+    					cstate->transition_capture->tcs_original_insert_tuple = NULL;
+    					cstate->transition_capture->tcs_map =
+    						cstate->transition_tupconv_maps[leaf_part_index];
     				}
     				else
     				{
@@ -5839,47 +5811,47 @@ ParallelCopyFrom(CopyState cstate)
     					 * Otherwise, just remember the original unconverted
     					 * tuple, to avoid a needless round trip conversion.
     					 */
-    					cfstate->cstate->transition_capture->tcs_original_insert_tuple = cfstate->tuple;
-    					cfstate->cstate->transition_capture->tcs_map = NULL;
+    					cstate->transition_capture->tcs_original_insert_tuple = tuple;
+    					cstate->transition_capture->tcs_map = NULL;
     				}
     			}
     			/*
     			 * We might need to convert from the parent rowtype to the
     			 * partition rowtype.
     			 */
-    			map = cfstate->cstate->partition_tupconv_maps[leaf_part_index];
+    			map = cstate->partition_tupconv_maps[leaf_part_index];
     			if (map)
     			{
-    				Relation	partrel = cfstate->resultRelInfo->ri_RelationDesc;
+    				Relation	partrel = resultRelInfo->ri_RelationDesc;

-    				cfstate->tuple = do_convert_tuple(cfstate->tuple, map);
+    				tuple = do_convert_tuple(tuple, map);

     				/*
     				 * We must use the partition's tuple descriptor from this
     				 * point on.  Use a dedicated slot from this point on until
     				 * we're finished dealing with the partition.
     				 */
-    				slot = cfstate->cstate->partition_tuple_slot;
+    				slot = cstate->partition_tuple_slot;
     				Assert(slot != NULL);
     				ExecSetSlotDescriptor(slot, RelationGetDescr(partrel));
-    				ExecStoreTuple(cfstate->tuple, slot, InvalidBuffer, true);
+    				ExecStoreTuple(tuple, slot, InvalidBuffer, true);
     			}

-    			cfstate->tuple->t_tableOid = RelationGetRelid(cfstate->resultRelInfo->ri_RelationDesc);
+    			tuple->t_tableOid = RelationGetRelid(resultRelInfo->ri_RelationDesc);
     		}

     		skip_tuple = false;

     		/* BEFORE ROW INSERT Triggers */
-    		if (cfstate->resultRelInfo->ri_TrigDesc &&
-    			cfstate->resultRelInfo->ri_TrigDesc->trig_insert_before_row)
+    		if (resultRelInfo->ri_TrigDesc &&
+    			resultRelInfo->ri_TrigDesc->trig_insert_before_row)
     		{
-    			slot = ExecBRInsertTriggers(cfstate->estate, cfstate->resultRelInfo, slot);
+    			slot = ExecBRInsertTriggers(estate, resultRelInfo, slot);

     			if (slot == NULL)	/* "do nothing" */
     				skip_tuple = true;
     			else				/* trigger might have changed tuple */
-    				cfstate->tuple = ExecMaterializeSlot(slot);
+    				tuple = ExecMaterializeSlot(slot);
     		}
         }
         else
@@ -5889,11 +5861,11 @@ ParallelCopyFrom(CopyState cstate)

 		if (!skip_tuple)
 		{
-			if (cfstate->resultRelInfo->ri_TrigDesc &&
-				cfstate->resultRelInfo->ri_TrigDesc->trig_insert_instead_row)
+			if (resultRelInfo->ri_TrigDesc &&
+				resultRelInfo->ri_TrigDesc->trig_insert_instead_row)
 			{
 				/* Pass the data to the INSTEAD ROW INSERT trigger */
-				ExecIRInsertTriggers(cfstate->estate, cfstate->resultRelInfo, slot);
+				ExecIRInsertTriggers(estate, resultRelInfo, slot);
 			}
 			else
 			{
@@ -5906,24 +5878,24 @@ ParallelCopyFrom(CopyState cstate)
 				 * satisfied, so we need to check in that case.
 				 */
 				bool		check_partition_constr =
-				(cfstate->resultRelInfo->ri_PartitionCheck != NIL);
+				(resultRelInfo->ri_PartitionCheck != NIL);

-				if (cfstate->saved_resultRelInfo != NULL &&
-					!(cfstate->resultRelInfo->ri_TrigDesc &&
-					  cfstate->resultRelInfo->ri_TrigDesc->trig_insert_before_row))
+				if (saved_resultRelInfo != NULL &&
+					!(resultRelInfo->ri_TrigDesc &&
+					  resultRelInfo->ri_TrigDesc->trig_insert_before_row))
 					check_partition_constr = false;

 				/* Check the constraints of the tuple */
-				if (cfstate->cstate->rel->rd_att->constr || check_partition_constr)
-					ExecConstraints(cfstate->resultRelInfo, slot, cfstate->estate);
+				if (cstate->rel->rd_att->constr || check_partition_constr)
+					ExecConstraints(resultRelInfo, slot, estate);

-				if (cfstate->useHeapMultiInsert)
+				if (useHeapMultiInsert)
 				{
 					/* Add this tuple to the tuple buffer */
-					if (cfstate->nBufferedTuples == 0)
-						cfstate->firstBufferedLineNo = cfstate->cstate->cur_lineno;
-					cfstate->bufferedTuples[cfstate->nBufferedTuples++] = cfstate->tuple;
-					cfstate->bufferedTuplesSize += cfstate->tuple->t_len;
+					if (nBufferedTuples == 0)
+						firstBufferedLineNo = cstate->cur_lineno;
+					bufferedTuples[nBufferedTuples++] = tuple;
+					bufferedTuplesSize += tuple->t_len;

 					/*
 					 * If the buffer filled up, flush it.  Also flush if the
@@ -5931,15 +5903,15 @@ ParallelCopyFrom(CopyState cstate)
 					 * large, to avoid using large amounts of memory for the
 					 * buffer when the tuples are exceptionally wide.
 					 */
-					if (cfstate->nBufferedTuples == MAX_BUFFERED_TUPLES ||
-						cfstate->bufferedTuplesSize > 65535)
+					if (nBufferedTuples == MAX_BUFFERED_TUPLES ||
+						bufferedTuplesSize > 65535)
 					{
-						CopyFromInsertBatch(cfstate->cstate, cfstate->estate, cfstate->mycid, cfstate->hi_options,
-											cfstate->resultRelInfo, cfstate->myslot, cfstate->bistate,
-											cfstate->nBufferedTuples, cfstate->bufferedTuples,
-											cfstate->firstBufferedLineNo);
-						cfstate->nBufferedTuples = 0;
-						cfstate->bufferedTuplesSize = 0;
+						CopyFromInsertBatch(cstate, estate, mycid, hi_options,
+											resultRelInfo, myslot, bistate,
+											nBufferedTuples, bufferedTuples,
+											firstBufferedLineNo);
+						nBufferedTuples = 0;
+						bufferedTuplesSize = 0;
 					}
 				}
 				else
@@ -5947,20 +5919,20 @@ ParallelCopyFrom(CopyState cstate)
 					List	   *recheckIndexes = NIL;

 					/* OK, store the tuple and create index entries for it */
-					heap_insert(cfstate->resultRelInfo->ri_RelationDesc, cfstate->tuple, cfstate->mycid,
-								cfstate->hi_options, cfstate->bistate);
+					heap_insert(resultRelInfo->ri_RelationDesc, tuple, mycid,
+								hi_options, bistate);

-					if (cfstate->resultRelInfo->ri_NumIndices > 0)
+					if (resultRelInfo->ri_NumIndices > 0)
 						recheckIndexes = ExecInsertIndexTuples(slot,
-															   &(cfstate->tuple->t_self),
-															   cfstate->estate,
+															   &(tuple->t_self),
+															   estate,
 															   false,
 															   NULL,
 															   NIL);

 					/* AFTER ROW INSERT Triggers */
-					ExecARInsertTriggers(cfstate->estate, cfstate->resultRelInfo, cfstate->tuple,
-										 recheckIndexes, cfstate->cstate->transition_capture);
+					ExecARInsertTriggers(estate, resultRelInfo, tuple,
+										 recheckIndexes, cstate->transition_capture);

 					list_free(recheckIndexes);
 				}
@@ -5971,94 +5943,94 @@ ParallelCopyFrom(CopyState cstate)
 			 * this is the same definition used by execMain.c for counting
 			 * tuples inserted by an INSERT command.
 			 */
-			cfstate->processed++;
+			processed++;

-			if (cfstate->saved_resultRelInfo)
+			if (saved_resultRelInfo)
 			{
-				cfstate->resultRelInfo = cfstate->saved_resultRelInfo;
-				cfstate->estate->es_result_relation_info = cfstate->resultRelInfo;
+				resultRelInfo = saved_resultRelInfo;
+				estate->es_result_relation_info = resultRelInfo;
 			}
 		}
 	}

 	/* Flush any remaining buffered tuples */
-	if (cfstate->nBufferedTuples > 0)
-		CopyFromInsertBatch(cfstate->cstate, cfstate->estate, cfstate->mycid, cfstate->hi_options,
-							cfstate->resultRelInfo, cfstate->myslot, cfstate->bistate,
-							cfstate->nBufferedTuples, cfstate->bufferedTuples,
-							cfstate->firstBufferedLineNo);
+	if (nBufferedTuples > 0)
+		CopyFromInsertBatch(cstate, estate, mycid, hi_options,
+							resultRelInfo, myslot, bistate,
+							nBufferedTuples, bufferedTuples,
+							firstBufferedLineNo);

 	/* Done, clean up */
-	error_context_stack = cfstate->errcallback.previous;
+	error_context_stack = errcallback.previous;

-	FreeBulkInsertState(cfstate->bistate);
+	FreeBulkInsertState(bistate);

-	MemoryContextSwitchTo(cfstate->oldcontext);
+	MemoryContextSwitchTo(oldcontext);

 	/*
 	 * In the old protocol, tell pqcomm that we can process normal protocol
 	 * messages again.
 	 */
-	if (cfstate->cstate->copy_dest == COPY_OLD_FE)
+	if (cstate->copy_dest == COPY_OLD_FE)
 		pq_endmsgread();

 	/* Execute AFTER STATEMENT insertion triggers */
-	ExecASInsertTriggers(cfstate->estate, cfstate->resultRelInfo, cfstate->cstate->transition_capture);
+	ExecASInsertTriggers(estate, resultRelInfo, cstate->transition_capture);

 	/* Handle queued AFTER triggers */
-	AfterTriggerEndQuery(cfstate->estate);
+	AfterTriggerEndQuery(estate);

-	pfree(cfstate->values);
-	pfree(cfstate->nulls);
+	pfree(values);
+	pfree(nulls);

-	ExecResetTupleTable(cfstate->estate->es_tupleTable, false);
+	ExecResetTupleTable(estate->es_tupleTable, false);

-	ExecCloseIndices(cfstate->resultRelInfo);
+	ExecCloseIndices(resultRelInfo);

 	/* Close all the partitioned tables, leaf partitions, and their indices */
-	if (cfstate->cstate->partition_dispatch_info)
+	if (cstate->partition_dispatch_info)
 	{
 		int			i;

 		/*
-		 * Remember cfstate->cstate->partition_dispatch_info[0] corresponds to the root
+		 * Remember cstate->partition_dispatch_info[0] corresponds to the root
 		 * partitioned table, which we must not try to close, because it is
 		 * the main target table of COPY that will be closed eventually by
 		 * DoCopy().  Also, tupslot is NULL for the root partitioned table.
 		 */
-		for (i = 1; i < cfstate->cstate->num_dispatch; i++)
+		for (i = 1; i < cstate->num_dispatch; i++)
 		{
-			PartitionDispatch pd = cfstate->cstate->partition_dispatch_info[i];
+			PartitionDispatch pd = cstate->partition_dispatch_info[i];

 			heap_close(pd->reldesc, NoLock);
 			ExecDropSingleTupleTableSlot(pd->tupslot);
 		}
-		for (i = 0; i < cfstate->cstate->num_partitions; i++)
+		for (i = 0; i < cstate->num_partitions; i++)
 		{
-			ResultRelInfo *resultRelInfo = cfstate->cstate->partitions + i;
+			ResultRelInfo *resultRelInfo = cstate->partitions + i;

 			ExecCloseIndices(resultRelInfo);
 			heap_close(resultRelInfo->ri_RelationDesc, NoLock);
 		}

 		/* Release the standalone partition tuple descriptor */
-		ExecDropSingleTupleTableSlot(cfstate->cstate->partition_tuple_slot);
+		ExecDropSingleTupleTableSlot(cstate->partition_tuple_slot);
 	}

 	/* Close any trigger target relations */
-	ExecCleanUpTriggerState(cfstate->estate);
+	ExecCleanUpTriggerState(estate);

-	FreeExecutorState(cfstate->estate);
+	FreeExecutorState(estate);

 	/*
 	 * If we skipped writing WAL, then we need to sync the heap (but not
 	 * indexes since those use WAL anyway)
 	 */
-	if (cfstate->hi_options & HEAP_INSERT_SKIP_WAL)
-		heap_sync(cfstate->cstate->rel);
+	if (hi_options & HEAP_INSERT_SKIP_WAL)
+		heap_sync(cstate->rel);

     /* Clean up. */
     dsm_detach(seg);

-	return cfstate->processed;
+	return processed;
 }
--
2.11.0


From fbd7dcfd7e46e0f58f2b197d3b5f9e89a5cd449f Mon Sep 17 00:00:00 2001
From: Alex K <alex.lumir@gmail.com>
Date: Tue, 1 Aug 2017 15:09:37 +0300
Subject: [PATCH 07/13] Stop workers on EOF instead of on_dsm_detach callback

---
 src/backend/commands/copy.c | 595 ++++++++++++++++++++++++--------------------
 1 file changed, 319 insertions(+), 276 deletions(-)

diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index b03fbacfa1..22446d75e8 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -226,7 +226,7 @@ typedef struct CopyStateData

 	/*
 	 * Finally, raw_buf holds raw data read from the data source (file or
-	 * client connection).  CopyReadLine parses this data sufficiently to
+	 * client connection).    parses this data sufficiently to
 	 * locate line boundaries, then transfers the data to line_buf and
 	 * converts it.  Note: we guarantee that there is a \0 at
 	 * raw_buf[raw_buf_len].
@@ -5113,6 +5113,11 @@ CopyFromBgwMainLoop(Datum main_arg)
         msg = (char *) data;
         if (shmq_res != SHM_MQ_SUCCESS)
             break;
+        if (len == 0)
+        {
+            elog(LOG, "BGWorker #%d: got zero-length message, stopping", myworkernumber);
+            break;
+        }
         // elog(LOG, "BGWorker #%d dummy processing line #%ld", myworkernumber, *(int64 *) data);
         elog(LOG, "BGWorker #%d dummy processing line: %s", myworkernumber, msg);
     }
@@ -5185,9 +5190,9 @@ shm_mq_setup(int64 queue_size, int32 nworkers, dsm_segment **segp,
      * kill them if we die; they'll die on their own as the message queues
      * shut down.
      */
-    // cancel_on_dsm_detach(seg, cleanup_background_workers,
-    //                      PointerGetDatum(wstate));
-    // pfree(wstate);
+    cancel_on_dsm_detach(seg, cleanup_background_workers,
+                         PointerGetDatum(wstate));
+    pfree(wstate);

     return pst;
 }
@@ -5666,27 +5671,28 @@ ParallelCopyFrom(CopyState cstate)

 	for (;;)
 	{
-		TupleTableSlot *slot;
-		bool		skip_tuple;
-		Oid			loaded_oid = InvalidOid;
-        int         next_cf_state; /* NextCopyFrom return state */
+        // TupleTableSlot *slot;
+        // bool        skip_tuple;
+        // Oid            loaded_oid = InvalidOid;
+        //         int         next_cf_state; /* NextCopyFrom return state */
+        bool done;

 		CHECK_FOR_INTERRUPTS();

-		if (nBufferedTuples == 0)
-		{
-			/*
-			 * Reset the per-tuple exprcontext. We can only do this if the
-			 * tuple buffer is empty. (Calling the context the per-tuple
-			 * memory context is a bit of a misnomer now.)
-			 */
-			ResetPerTupleExprContext(estate);
-		}
+        // if (nBufferedTuples == 0)
+        // {
+        //     /*
+        //      * Reset the per-tuple exprcontext. We can only do this if the
+        //      * tuple buffer is empty. (Calling the context the per-tuple
+        //      * memory context is a bit of a misnomer now.)
+        //      */
+        //     ResetPerTupleExprContext(estate);
+        // }

 		/* Switch into its memory context */
-		MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
+        // MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));

-        next_cf_state = NextCopyFrom(cstate, econtext, values, nulls, &loaded_oid);
+        // next_cf_state = NextCopyFrom(cstate, econtext, values, nulls, &loaded_oid);


         // LWLockAcquire(CopyFromBgwLock, LW_EXCLUSIVE);
@@ -5695,15 +5701,52 @@ ParallelCopyFrom(CopyState cstate)
         // // SpinLockRelease(&pst->mutex);
         // LWLockRelease(CopyFromBgwLock);

-        if (next_cf_state) {
+    	/* on input just throw the header line away */
+    	if (cstate->cur_lineno == 0 && cstate->header_line)
+    	{
+    		cstate->cur_lineno++;
+    		if (CopyReadLine(cstate))
+    			continue;		/* done */
+    	}
+
+    	cstate->cur_lineno++;
+
+    	/* Actually read the line into memory here */
+    	done = CopyReadLine(cstate);
+        // elog(LOG, "Reading line #%d with status %d", cstate->cur_lineno, done);
+
+    	/*
+    	 * EOF at start of line means we're done.  If we see EOF after some
+    	 * characters, we act as though it was newline followed by EOF, ie,
+    	 * process the line and then exit loop on next iteration.
+    	 */
+        if (done && cstate->line_buf.len == 0)
+        {
+            int i;
+
+            elog(LOG, "EOF reached, ending up queries");
+            for (i = 0; i < nworkers; i++)
+            {
+                /*
+                 * Sending zero-length data to workers in order to stop them.
+                 */
+                shm_mq_send(mq_handles[i], 0, cstate->line_buf.data, false);
+            }
+
+            break;
+        }
+        else
+        {
             // message = pst->curr_line;
             // cur_ptr = cstate->line_buf.data;
             // line_end_ptr = cstate->line_buf.data + cstate->line_buf.len;
-    		message = (char *) palloc(cstate->line_buf.len);
-    		memcpy(message, cstate->line_buf.data, cstate->line_buf.len);
+            // message = (char *) palloc(cstate->line_buf.len);
+            // memcpy(message, cstate->line_buf.data, cstate->line_buf.len);
             // shmq_res = shm_mq_send(mq_handles[++last_worker_used - 1], message_size, &message, false);
-            elog(LOG, "Sending line #%d '%s' to BGWorker #%d", cstate->cur_lineno, message, last_worker_used + 1);
-            shmq_res = shm_mq_send(mq_handles[++last_worker_used - 1], cstate->line_buf.len, message, false);
+            // elog(LOG, "Sending line #%d '%s' to BGWorker #%d", cstate->cur_lineno, message, last_worker_used + 1);
+            elog(LOG, "Sending line #%d to BGWorker #%d", cstate->cur_lineno, last_worker_used + 1);
+            // shmq_res = shm_mq_send(mq_handles[++last_worker_used - 1], cstate->line_buf.len, message, false);
+            shmq_res = shm_mq_send(mq_handles[++last_worker_used - 1], cstate->line_buf.len, cstate->line_buf.data, false);
             if (shmq_res != SHM_MQ_SUCCESS)
                 ereport(ERROR,
                         (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
@@ -5712,258 +5755,258 @@ ParallelCopyFrom(CopyState cstate)
                 last_worker_used = 0;
         }

-		if (!next_cf_state) {
-			break;
-		}
-        else if (next_cf_state == NCF_SUCCESS)
-        {
-    		/* And now we can form the input tuple. */
-    		tuple = heap_form_tuple(tupDesc, values, nulls);
-
-    		if (loaded_oid != InvalidOid)
-    			HeapTupleSetOid(tuple, loaded_oid);
-
-    		/*
-    		 * Constraints might reference the tableoid column, so initialize
-    		 * t_tableOid before evaluating them.
-    		 */
-    		tuple->t_tableOid = RelationGetRelid(resultRelInfo->ri_RelationDesc);
-
-    		/* Triggers and stuff need to be invoked in query context. */
-    		MemoryContextSwitchTo(oldcontext);
-
-    		/* Place tuple in tuple slot --- but slot shouldn't free it */
-    		slot = myslot;
-    		ExecStoreTuple(tuple, slot, InvalidBuffer, false);
-
-    		/* Determine the partition to heap_insert the tuple into */
-    		if (cstate->partition_dispatch_info)
-    		{
-    			int			leaf_part_index;
-    			TupleConversionMap *map;
-
-    			/*
-    			 * Away we go ... If we end up not finding a partition after all,
-    			 * ExecFindPartition() does not return and errors out instead.
-    			 * Otherwise, the returned value is to be used as an index into
-    			 * arrays mt_partitions[] and mt_partition_tupconv_maps[] that
-    			 * will get us the ResultRelInfo and TupleConversionMap for the
-    			 * partition, respectively.
-    			 */
-    			leaf_part_index = ExecFindPartition(resultRelInfo,
-    												cstate->partition_dispatch_info,
-    												slot,
-    												estate);
-    			Assert(leaf_part_index >= 0 &&
-    				   leaf_part_index < cstate->num_partitions);
-
-    			/*
-    			 * If this tuple is mapped to a partition that is not same as the
-    			 * previous one, we'd better make the bulk insert mechanism gets a
-    			 * new buffer.
-    			 */
-    			if (prev_leaf_part_index != leaf_part_index)
-    			{
-    				ReleaseBulkInsertStatePin(bistate);
-    				prev_leaf_part_index = leaf_part_index;
-    			}
-
-    			/*
-    			 * Save the old ResultRelInfo and switch to the one corresponding
-    			 * to the selected partition.
-    			 */
-    			saved_resultRelInfo = resultRelInfo;
-    			resultRelInfo = cstate->partitions + leaf_part_index;
-
-    			/* We do not yet have a way to insert into a foreign partition */
-    			if (resultRelInfo->ri_FdwRoutine)
-    				ereport(ERROR,
-    						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-    						 errmsg("cannot route inserted tuples to a foreign table")));
-
-    			/*
-    			 * For ExecInsertIndexTuples() to work on the partition's indexes
-    			 */
-    			estate->es_result_relation_info = resultRelInfo;
-
-    			/*
-    			 * If we're capturing transition tuples, we might need to convert
-    			 * from the partition rowtype to parent rowtype.
-    			 */
-    			if (cstate->transition_capture != NULL)
-    			{
-    				if (resultRelInfo->ri_TrigDesc &&
-    					(resultRelInfo->ri_TrigDesc->trig_insert_before_row ||
-    					 resultRelInfo->ri_TrigDesc->trig_insert_instead_row))
-    				{
-    					/*
-    					 * If there are any BEFORE or INSTEAD triggers on the
-    					 * partition, we'll have to be ready to convert their
-    					 * result back to tuplestore format.
-    					 */
-    					cstate->transition_capture->tcs_original_insert_tuple = NULL;
-    					cstate->transition_capture->tcs_map =
-    						cstate->transition_tupconv_maps[leaf_part_index];
-    				}
-    				else
-    				{
-    					/*
-    					 * Otherwise, just remember the original unconverted
-    					 * tuple, to avoid a needless round trip conversion.
-    					 */
-    					cstate->transition_capture->tcs_original_insert_tuple = tuple;
-    					cstate->transition_capture->tcs_map = NULL;
-    				}
-    			}
-    			/*
-    			 * We might need to convert from the parent rowtype to the
-    			 * partition rowtype.
-    			 */
-    			map = cstate->partition_tupconv_maps[leaf_part_index];
-    			if (map)
-    			{
-    				Relation	partrel = resultRelInfo->ri_RelationDesc;
-
-    				tuple = do_convert_tuple(tuple, map);
-
-    				/*
-    				 * We must use the partition's tuple descriptor from this
-    				 * point on.  Use a dedicated slot from this point on until
-    				 * we're finished dealing with the partition.
-    				 */
-    				slot = cstate->partition_tuple_slot;
-    				Assert(slot != NULL);
-    				ExecSetSlotDescriptor(slot, RelationGetDescr(partrel));
-    				ExecStoreTuple(tuple, slot, InvalidBuffer, true);
-    			}
-
-    			tuple->t_tableOid = RelationGetRelid(resultRelInfo->ri_RelationDesc);
-    		}
-
-    		skip_tuple = false;
-
-    		/* BEFORE ROW INSERT Triggers */
-    		if (resultRelInfo->ri_TrigDesc &&
-    			resultRelInfo->ri_TrigDesc->trig_insert_before_row)
-    		{
-    			slot = ExecBRInsertTriggers(estate, resultRelInfo, slot);
-
-    			if (slot == NULL)	/* "do nothing" */
-    				skip_tuple = true;
-    			else				/* trigger might have changed tuple */
-    				tuple = ExecMaterializeSlot(slot);
-    		}
-        }
-        else
-        {
-            skip_tuple = true;
-        }
-
-		if (!skip_tuple)
-		{
-			if (resultRelInfo->ri_TrigDesc &&
-				resultRelInfo->ri_TrigDesc->trig_insert_instead_row)
-			{
-				/* Pass the data to the INSTEAD ROW INSERT trigger */
-				ExecIRInsertTriggers(estate, resultRelInfo, slot);
-			}
-			else
-			{
-				/*
-				 * We always check the partition constraint, including when
-				 * the tuple got here via tuple-routing.  However we don't
-				 * need to in the latter case if no BR trigger is defined on
-				 * the partition.  Note that a BR trigger might modify the
-				 * tuple such that the partition constraint is no longer
-				 * satisfied, so we need to check in that case.
-				 */
-				bool		check_partition_constr =
-				(resultRelInfo->ri_PartitionCheck != NIL);
-
-				if (saved_resultRelInfo != NULL &&
-					!(resultRelInfo->ri_TrigDesc &&
-					  resultRelInfo->ri_TrigDesc->trig_insert_before_row))
-					check_partition_constr = false;
-
-				/* Check the constraints of the tuple */
-				if (cstate->rel->rd_att->constr || check_partition_constr)
-					ExecConstraints(resultRelInfo, slot, estate);
-
-				if (useHeapMultiInsert)
-				{
-					/* Add this tuple to the tuple buffer */
-					if (nBufferedTuples == 0)
-						firstBufferedLineNo = cstate->cur_lineno;
-					bufferedTuples[nBufferedTuples++] = tuple;
-					bufferedTuplesSize += tuple->t_len;
-
-					/*
-					 * If the buffer filled up, flush it.  Also flush if the
-					 * total size of all the tuples in the buffer becomes
-					 * large, to avoid using large amounts of memory for the
-					 * buffer when the tuples are exceptionally wide.
-					 */
-					if (nBufferedTuples == MAX_BUFFERED_TUPLES ||
-						bufferedTuplesSize > 65535)
-					{
-						CopyFromInsertBatch(cstate, estate, mycid, hi_options,
-											resultRelInfo, myslot, bistate,
-											nBufferedTuples, bufferedTuples,
-											firstBufferedLineNo);
-						nBufferedTuples = 0;
-						bufferedTuplesSize = 0;
-					}
-				}
-				else
-				{
-					List	   *recheckIndexes = NIL;
-
-					/* OK, store the tuple and create index entries for it */
-					heap_insert(resultRelInfo->ri_RelationDesc, tuple, mycid,
-								hi_options, bistate);
-
-					if (resultRelInfo->ri_NumIndices > 0)
-						recheckIndexes = ExecInsertIndexTuples(slot,
-															   &(tuple->t_self),
-															   estate,
-															   false,
-															   NULL,
-															   NIL);
-
-					/* AFTER ROW INSERT Triggers */
-					ExecARInsertTriggers(estate, resultRelInfo, tuple,
-										 recheckIndexes, cstate->transition_capture);
-
-					list_free(recheckIndexes);
-				}
-			}
-
-			/*
-			 * We count only tuples not suppressed by a BEFORE INSERT trigger;
-			 * this is the same definition used by execMain.c for counting
-			 * tuples inserted by an INSERT command.
-			 */
-			processed++;
-
-			if (saved_resultRelInfo)
-			{
-				resultRelInfo = saved_resultRelInfo;
-				estate->es_result_relation_info = resultRelInfo;
-			}
-		}
-	}
-
-	/* Flush any remaining buffered tuples */
-	if (nBufferedTuples > 0)
-		CopyFromInsertBatch(cstate, estate, mycid, hi_options,
-							resultRelInfo, myslot, bistate,
-							nBufferedTuples, bufferedTuples,
-							firstBufferedLineNo);
-
-	/* Done, clean up */
-	error_context_stack = errcallback.previous;
-
-	FreeBulkInsertState(bistate);
+    //     if (!next_cf_state) {
+    //         break;
+    //     }
+    //         else if (next_cf_state == NCF_SUCCESS)
+    //         {
+    //             /* And now we can form the input tuple. */
+    //             tuple = heap_form_tuple(tupDesc, values, nulls);
+    //
+    //             if (loaded_oid != InvalidOid)
+    //                 HeapTupleSetOid(tuple, loaded_oid);
+    //
+    //             /*
+    //              * Constraints might reference the tableoid column, so initialize
+    //              * t_tableOid before evaluating them.
+    //              */
+    //             tuple->t_tableOid = RelationGetRelid(resultRelInfo->ri_RelationDesc);
+    //
+    //             /* Triggers and stuff need to be invoked in query context. */
+    //             MemoryContextSwitchTo(oldcontext);
+    //
+    //             /* Place tuple in tuple slot --- but slot shouldn't free it */
+    //             slot = myslot;
+    //             ExecStoreTuple(tuple, slot, InvalidBuffer, false);
+    //
+    //             /* Determine the partition to heap_insert the tuple into */
+    //             if (cstate->partition_dispatch_info)
+    //             {
+    //                 int            leaf_part_index;
+    //                 TupleConversionMap *map;
+    //
+    //                 /*
+    //                  * Away we go ... If we end up not finding a partition after all,
+    //                  * ExecFindPartition() does not return and errors out instead.
+    //                  * Otherwise, the returned value is to be used as an index into
+    //                  * arrays mt_partitions[] and mt_partition_tupconv_maps[] that
+    //                  * will get us the ResultRelInfo and TupleConversionMap for the
+    //                  * partition, respectively.
+    //                  */
+    //                 leaf_part_index = ExecFindPartition(resultRelInfo,
+    //                                                     cstate->partition_dispatch_info,
+    //                                                     slot,
+    //                                                     estate);
+    //                 Assert(leaf_part_index >= 0 &&
+    //                        leaf_part_index < cstate->num_partitions);
+    //
+    //                 /*
+    //                  * If this tuple is mapped to a partition that is not same as the
+    //                  * previous one, we'd better make the bulk insert mechanism gets a
+    //                  * new buffer.
+    //                  */
+    //                 if (prev_leaf_part_index != leaf_part_index)
+    //                 {
+    //                     ReleaseBulkInsertStatePin(bistate);
+    //                     prev_leaf_part_index = leaf_part_index;
+    //                 }
+    //
+    //                 /*
+    //                  * Save the old ResultRelInfo and switch to the one corresponding
+    //                  * to the selected partition.
+    //                  */
+    //                 saved_resultRelInfo = resultRelInfo;
+    //                 resultRelInfo = cstate->partitions + leaf_part_index;
+    //
+    //                 /* We do not yet have a way to insert into a foreign partition */
+    //                 if (resultRelInfo->ri_FdwRoutine)
+    //                     ereport(ERROR,
+    //                             (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+    //                              errmsg("cannot route inserted tuples to a foreign table")));
+    //
+    //                 /*
+    //                  * For ExecInsertIndexTuples() to work on the partition's indexes
+    //                  */
+    //                 estate->es_result_relation_info = resultRelInfo;
+    //
+    //                 /*
+    //                  * If we're capturing transition tuples, we might need to convert
+    //                  * from the partition rowtype to parent rowtype.
+    //                  */
+    //                 if (cstate->transition_capture != NULL)
+    //                 {
+    //                     if (resultRelInfo->ri_TrigDesc &&
+    //                         (resultRelInfo->ri_TrigDesc->trig_insert_before_row ||
+    //                          resultRelInfo->ri_TrigDesc->trig_insert_instead_row))
+    //                     {
+    //                         /*
+    //                          * If there are any BEFORE or INSTEAD triggers on the
+    //                          * partition, we'll have to be ready to convert their
+    //                          * result back to tuplestore format.
+    //                          */
+    //                         cstate->transition_capture->tcs_original_insert_tuple = NULL;
+    //                         cstate->transition_capture->tcs_map =
+    //                             cstate->transition_tupconv_maps[leaf_part_index];
+    //                     }
+    //                     else
+    //                     {
+    //                         /*
+    //                          * Otherwise, just remember the original unconverted
+    //                          * tuple, to avoid a needless round trip conversion.
+    //                          */
+    //                         cstate->transition_capture->tcs_original_insert_tuple = tuple;
+    //                         cstate->transition_capture->tcs_map = NULL;
+    //                     }
+    //                 }
+    //                 /*
+    //                  * We might need to convert from the parent rowtype to the
+    //                  * partition rowtype.
+    //                  */
+    //                 map = cstate->partition_tupconv_maps[leaf_part_index];
+    //                 if (map)
+    //                 {
+    //                     Relation    partrel = resultRelInfo->ri_RelationDesc;
+    //
+    //                     tuple = do_convert_tuple(tuple, map);
+    //
+    //                     /*
+    //                      * We must use the partition's tuple descriptor from this
+    //                      * point on.  Use a dedicated slot from this point on until
+    //                      * we're finished dealing with the partition.
+    //                      */
+    //                     slot = cstate->partition_tuple_slot;
+    //                     Assert(slot != NULL);
+    //                     ExecSetSlotDescriptor(slot, RelationGetDescr(partrel));
+    //                     ExecStoreTuple(tuple, slot, InvalidBuffer, true);
+    //                 }
+    //
+    //                 tuple->t_tableOid = RelationGetRelid(resultRelInfo->ri_RelationDesc);
+    //             }
+    //
+    //             skip_tuple = false;
+    //
+    //             /* BEFORE ROW INSERT Triggers */
+    //             if (resultRelInfo->ri_TrigDesc &&
+    //                 resultRelInfo->ri_TrigDesc->trig_insert_before_row)
+    //             {
+    //                 slot = ExecBRInsertTriggers(estate, resultRelInfo, slot);
+    //
+    //                 if (slot == NULL)    /* "do nothing" */
+    //                     skip_tuple = true;
+    //                 else                /* trigger might have changed tuple */
+    //                     tuple = ExecMaterializeSlot(slot);
+    //             }
+    //         }
+    //         else
+    //         {
+    //             skip_tuple = true;
+    //         }
+    //
+    //     if (!skip_tuple)
+    //     {
+    //         if (resultRelInfo->ri_TrigDesc &&
+    //             resultRelInfo->ri_TrigDesc->trig_insert_instead_row)
+    //         {
+    //             /* Pass the data to the INSTEAD ROW INSERT trigger */
+    //             ExecIRInsertTriggers(estate, resultRelInfo, slot);
+    //         }
+    //         else
+    //         {
+    //             /*
+    //              * We always check the partition constraint, including when
+    //              * the tuple got here via tuple-routing.  However we don't
+    //              * need to in the latter case if no BR trigger is defined on
+    //              * the partition.  Note that a BR trigger might modify the
+    //              * tuple such that the partition constraint is no longer
+    //              * satisfied, so we need to check in that case.
+    //              */
+    //             bool        check_partition_constr =
+    //             (resultRelInfo->ri_PartitionCheck != NIL);
+    //
+    //             if (saved_resultRelInfo != NULL &&
+    //                 !(resultRelInfo->ri_TrigDesc &&
+    //                   resultRelInfo->ri_TrigDesc->trig_insert_before_row))
+    //                 check_partition_constr = false;
+    //
+    //             /* Check the constraints of the tuple */
+    //             if (cstate->rel->rd_att->constr || check_partition_constr)
+    //                 ExecConstraints(resultRelInfo, slot, estate);
+    //
+    //             if (useHeapMultiInsert)
+    //             {
+    //                 /* Add this tuple to the tuple buffer */
+    //                 if (nBufferedTuples == 0)
+    //                     firstBufferedLineNo = cstate->cur_lineno;
+    //                 bufferedTuples[nBufferedTuples++] = tuple;
+    //                 bufferedTuplesSize += tuple->t_len;
+    //
+    //                 /*
+    //                  * If the buffer filled up, flush it.  Also flush if the
+    //                  * total size of all the tuples in the buffer becomes
+    //                  * large, to avoid using large amounts of memory for the
+    //                  * buffer when the tuples are exceptionally wide.
+    //                  */
+    //                 if (nBufferedTuples == MAX_BUFFERED_TUPLES ||
+    //                     bufferedTuplesSize > 65535)
+    //                 {
+    //                     CopyFromInsertBatch(cstate, estate, mycid, hi_options,
+    //                                         resultRelInfo, myslot, bistate,
+    //                                         nBufferedTuples, bufferedTuples,
+    //                                         firstBufferedLineNo);
+    //                     nBufferedTuples = 0;
+    //                     bufferedTuplesSize = 0;
+    //                 }
+    //             }
+    //             else
+    //             {
+    //                 List       *recheckIndexes = NIL;
+    //
+    //                 /* OK, store the tuple and create index entries for it */
+    //                 heap_insert(resultRelInfo->ri_RelationDesc, tuple, mycid,
+    //                             hi_options, bistate);
+    //
+    //                 if (resultRelInfo->ri_NumIndices > 0)
+    //                     recheckIndexes = ExecInsertIndexTuples(slot,
+    //                                                            &(tuple->t_self),
+    //                                                            estate,
+    //                                                            false,
+    //                                                            NULL,
+    //                                                            NIL);
+    //
+    //                 /* AFTER ROW INSERT Triggers */
+    //                 ExecARInsertTriggers(estate, resultRelInfo, tuple,
+    //                                      recheckIndexes, cstate->transition_capture);
+    //
+    //                 list_free(recheckIndexes);
+    //             }
+    //         }
+    //
+    //         /*
+    //          * We count only tuples not suppressed by a BEFORE INSERT trigger;
+    //          * this is the same definition used by execMain.c for counting
+    //          * tuples inserted by an INSERT command.
+    //          */
+    //         processed++;
+    //
+    //         if (saved_resultRelInfo)
+    //         {
+    //             resultRelInfo = saved_resultRelInfo;
+    //             estate->es_result_relation_info = resultRelInfo;
+    //         }
+    //     }
+    }
+    //
+    // /* Flush any remaining buffered tuples */
+    // if (nBufferedTuples > 0)
+    //     CopyFromInsertBatch(cstate, estate, mycid, hi_options,
+    //                         resultRelInfo, myslot, bistate,
+    //                         nBufferedTuples, bufferedTuples,
+    //                         firstBufferedLineNo);
+    //
+    // /* Done, clean up */
+    // error_context_stack = errcallback.previous;
+    //
+    // FreeBulkInsertState(bistate);

 	MemoryContextSwitchTo(oldcontext);

--
2.11.0


From 0ba0d2802955fc3bc25304682cdfb42a1ba365de Mon Sep 17 00:00:00 2001
From: Alex K <alex.lumir@gmail.com>
Date: Fri, 4 Aug 2017 20:13:47 +0300
Subject: [PATCH 08/13] Use ConditionalVariable to wait until BGWorkers
 complete their work

---
 src/backend/commands/copy.c     | 50 ++++++++++++++++++++++++++++++++++++++++-
 src/backend/postmaster/pgstat.c |  3 +++
 src/include/pgstat.h            |  3 ++-
 3 files changed, 54 insertions(+), 2 deletions(-)

diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index 22446d75e8..122822e6bf 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -260,8 +260,10 @@ typedef struct
     int workers_total;
     int workers_attached;
     int workers_ready;
+    int workers_finished;
     Oid database_id;
     Oid authenticated_user_id;
+    ConditionVariable cv;
 } ParallelState;

 // TODO Consider change
@@ -5008,6 +5010,7 @@ CopyFromBgwMainLoop(Datum main_arg)
     shm_mq_result    shmq_res;
     Size             len;
     void            *data;
+    ConditionVariable cv;

     /*
      * Establish signal handlers.
@@ -5089,6 +5092,7 @@ CopyFromBgwMainLoop(Datum main_arg)
     LWLockAcquire(CopyFromBgwLock, LW_EXCLUSIVE);
     // SpinLockAcquire(&pst->mutex);
     ++pst->workers_ready;
+    cv = pst->cv;
     elog(LOG, "BGWorker #%d started", myworkernumber);
     // SpinLockRelease(&pst->mutex);
     LWLockRelease(CopyFromBgwLock);
@@ -5122,6 +5126,13 @@ CopyFromBgwMainLoop(Datum main_arg)
         elog(LOG, "BGWorker #%d dummy processing line: %s", myworkernumber, msg);
     }

+    LWLockAcquire(CopyFromBgwLock, LW_EXCLUSIVE);
+    ++pst->workers_finished;
+    LWLockRelease(CopyFromBgwLock);
+
+    /* Signal main process that we are done. */
+    ConditionVariableBroadcast(&cv);
+
     /*
      * We're done.  Explicitly detach the shared memory segment so that we
      * don't get a resource leak warning at commit time.  This will fire any
@@ -5250,12 +5261,14 @@ setup_dynamic_shared_memory(int64 queue_size, int nworkers,

     /* Set up the header region. */
     pst = shm_toc_allocate(toc, sizeof(ParallelState));
-    SpinLockInit(&pst->mutex);
+    // SpinLockInit(&pst->mutex);
     pst->workers_total = nworkers;
     pst->workers_attached = 0;
     pst->workers_ready = 0;
+    pst->workers_finished = 0;
     pst->database_id = MyDatabaseId;
     pst->authenticated_user_id = GetAuthenticatedUserId();
+    ConditionVariableInit(&pst->cv);
     shm_toc_insert(toc, 0, pst);

     /* Set up one message queue per worker, plus one. */
@@ -5431,6 +5444,38 @@ check_worker_status(WorkerState *wstate)
     return true;
 }

+static void
+wait_for_workers_to_finish(volatile ParallelState *pst)
+{
+    elog(LOG, "Waiting for BGWorkers to finish");
+
+    for (;;)
+    {
+        int workers_finished;
+        int workers_total;
+        ConditionVariable cv;
+        LWLockAcquire(CopyFromBgwLock, LW_EXCLUSIVE);
+        workers_finished = pst->workers_finished;
+        workers_total = pst->workers_total;
+        cv = pst->cv;
+        LWLockRelease(CopyFromBgwLock);
+
+        elog(LOG, "Checking BGWorkers workers_finished: %d, workers_total: %d", workers_finished, workers_total);
+        if (workers_finished == workers_total)
+        {
+            break;
+        }
+
+        elog(LOG, "Going to sleep again");
+        /* Wait for the workers to wake us up. */
+        ConditionVariableSleep(&cv, WAIT_EVENT_COPY_FROM_BGWORKERS_FINISHED);
+
+        /* An interrupt may have occurred while we were waiting. */
+        CHECK_FOR_INTERRUPTS();
+    }
+
+    ConditionVariableCancelSleep();
+}

 /*
  * Parallel Copy FROM file to relation.
@@ -6008,6 +6053,9 @@ ParallelCopyFrom(CopyState cstate)
     //
     // FreeBulkInsertState(bistate);

+    /* Wait for all workers to complete their work. */
+    wait_for_workers_to_finish(pst);
+
 	MemoryContextSwitchTo(oldcontext);

 	/*
diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c
index a0b0eecbd5..ba1c8eeed1 100644
--- a/src/backend/postmaster/pgstat.c
+++ b/src/backend/postmaster/pgstat.c
@@ -3612,6 +3612,9 @@ pgstat_get_wait_ipc(WaitEventIPC w)
 		case WAIT_EVENT_LOGICAL_SYNC_STATE_CHANGE:
 			event_name = "LogicalSyncStateChange";
 			break;
+        case WAIT_EVENT_COPY_FROM_BGWORKERS_FINISHED:
+            event_name = "CopyFromBGWorkersFinished";
+            break;
 			/* no default case, so that compiler will warn */
 	}

diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index 6bffe63ad6..66f6b2ff2f 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -812,7 +812,8 @@ typedef enum
 	WAIT_EVENT_SAFE_SNAPSHOT,
 	WAIT_EVENT_SYNC_REP,
 	WAIT_EVENT_LOGICAL_SYNC_DATA,
-	WAIT_EVENT_LOGICAL_SYNC_STATE_CHANGE
+	WAIT_EVENT_LOGICAL_SYNC_STATE_CHANGE,
+  WAIT_EVENT_COPY_FROM_BGWORKERS_FINISHED
 } WaitEventIPC;

 /* ----------
--
2.11.0


From 353f08dfd225f75099c66dd48109ea31c55adec9 Mon Sep 17 00:00:00 2001
From: Alex K <alex.lumir@gmail.com>
Date: Tue, 15 Aug 2017 12:18:11 +0300
Subject: [PATCH 09/13] Very very dirty, but working trick to obtain CopyState
 in the parallel worker

---
 src/backend/commands/copy.c | 1168 ++++++++++++++++++++++++++++++++-----------
 src/include/commands/copy.h |    7 +-
 2 files changed, 884 insertions(+), 291 deletions(-)

diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index 122822e6bf..7f38e70bf3 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -38,8 +38,10 @@
 #include "optimizer/planner.h"
 #include "optimizer/cost.h"
 #include "nodes/makefuncs.h"
+#include "parser/parser.h"
 #include "parser/parse_relation.h"
 #include "pgstat.h"
+#include "tcop/pquery.h"
 #include "postmaster/bgworker.h"
 #include "rewrite/rewriteHandler.h"
 #include "storage/dsm.h"
@@ -264,6 +266,21 @@ typedef struct
     Oid database_id;
     Oid authenticated_user_id;
     ConditionVariable cv;
+
+    bool     is_program;
+    Relation rel;
+
+	TupleDesc       tupDesc;
+	ResultRelInfo  *resultRelInfo;
+	EState         *estate;
+	ExprContext    *econtext;
+	TupleTableSlot *myslot;
+
+	ErrorContextCallback errcallback;
+	CommandId	mycid;
+	int			hi_options;
+	BulkInsertState bistate;
+	bool		useHeapMultiInsert;
 } ParallelState;

 // TODO Consider change
@@ -389,12 +406,19 @@ static void CopySendInt16(CopyState cstate, int16 val);
 static bool CopyGetInt16(CopyState cstate, int16 *val);


-static ParallelState* shm_mq_setup(int64 queue_size, int32 nworkers,
-                                   dsm_segment **segp, shm_mq_handle **mq_handles[]);
-static void setup_dynamic_shared_memory(int64 queue_size, int nworkers,
+static ParallelState* setup_parallel_copy_from(int64 queue_size, int32 nworkers,
+                                   dsm_segment **segp, shm_mq_handle **mq_handles[], CopyState cstate,
+                                   ParseState *pstate,
+                                   List *attnamelist,
+                                   List *options);
+static void setup_dsm(int64 queue_size, int nworkers,
                                         dsm_segment **segp,
                                         ParallelState **pstp,
-                                        shm_mq **mqs[]);
+                                        shm_mq **mqs[],
+                                        CopyState cstate,
+                                        ParseState *pstate,
+                                        List *attnamelist,
+                                        List *options);
 static WorkerState *setup_background_workers(int nworkers,
                                              dsm_segment *seg);
 static void cleanup_background_workers(dsm_segment *seg, Datum arg);
@@ -1041,7 +1065,8 @@ DoCopy(ParseState *pstate, const CopyStmt *stmt,

         if (cstate->allow_parallel)	/* copy from file to database */
         {
-    		*processed = ParallelCopyFrom(cstate);
+    		*processed = ParallelCopyFrom(cstate, pstate, rel, stmt->filename, stmt->is_program,
+							              stmt->attlist, stmt->options);
         }
         else
         {
@@ -3169,63 +3194,66 @@ BeginCopyFrom(ParseState *pstate,
 	cstate->num_defaults = num_defaults;
 	cstate->is_program = is_program;

-	if (data_source_cb)
-	{
-		cstate->copy_dest = COPY_CALLBACK;
-		cstate->data_source_cb = data_source_cb;
-	}
-	else if (pipe)
-	{
-		Assert(!is_program);	/* the grammar does not allow this */
-		if (whereToSendOutput == DestRemote)
-			ReceiveCopyBegin(cstate);
-		else
-			cstate->copy_file = stdin;
-	}
-	else
-	{
-		cstate->filename = pstrdup(filename);
+    if (!IsBackgroundWorker)
+    {
+    	if (data_source_cb)
+    	{
+    		cstate->copy_dest = COPY_CALLBACK;
+    		cstate->data_source_cb = data_source_cb;
+    	}
+    	else if (pipe)
+    	{
+    		Assert(!is_program);	/* the grammar does not allow this */
+    		if (whereToSendOutput == DestRemote)
+    			ReceiveCopyBegin(cstate);
+    		else
+    			cstate->copy_file = stdin;
+    	}
+    	else
+    	{
+    		cstate->filename = pstrdup(filename);

-		if (cstate->is_program)
-		{
-			cstate->copy_file = OpenPipeStream(cstate->filename, PG_BINARY_R);
-			if (cstate->copy_file == NULL)
-				ereport(ERROR,
-						(errcode_for_file_access(),
-						 errmsg("could not execute command \"%s\": %m",
-								cstate->filename)));
-		}
-		else
-		{
-			struct stat st;
+    		if (cstate->is_program)
+    		{
+    			cstate->copy_file = OpenPipeStream(cstate->filename, PG_BINARY_R);
+    			if (cstate->copy_file == NULL)
+    				ereport(ERROR,
+    						(errcode_for_file_access(),
+    						 errmsg("could not execute command \"%s\": %m",
+    								cstate->filename)));
+    		}
+    		else
+    		{
+    			struct stat st;

-			cstate->copy_file = AllocateFile(cstate->filename, PG_BINARY_R);
-			if (cstate->copy_file == NULL)
-			{
-				/* copy errno because ereport subfunctions might change it */
-				int			save_errno = errno;
+    			cstate->copy_file = AllocateFile(cstate->filename, PG_BINARY_R);
+    			if (cstate->copy_file == NULL)
+    			{
+    				/* copy errno because ereport subfunctions might change it */
+    				int			save_errno = errno;

-				ereport(ERROR,
-						(errcode_for_file_access(),
-						 errmsg("could not open file \"%s\" for reading: %m",
-								cstate->filename),
-						 (save_errno == ENOENT || save_errno == EACCES) ?
-						 errhint("COPY FROM instructs the PostgreSQL server process to read a file. "
-								 "You may want a client-side facility such as psql's \\copy.") : 0));
-			}
+    				ereport(ERROR,
+    						(errcode_for_file_access(),
+    						 errmsg("could not open file \"%s\" for reading: %m",
+    								cstate->filename),
+    						 (save_errno == ENOENT || save_errno == EACCES) ?
+    						 errhint("COPY FROM instructs the PostgreSQL server process to read a file. "
+    								 "You may want a client-side facility such as psql's \\copy.") : 0));
+    			}

-			if (fstat(fileno(cstate->copy_file), &st))
-				ereport(ERROR,
-						(errcode_for_file_access(),
-						 errmsg("could not stat file \"%s\": %m",
-								cstate->filename)));
+    			if (fstat(fileno(cstate->copy_file), &st))
+    				ereport(ERROR,
+    						(errcode_for_file_access(),
+    						 errmsg("could not stat file \"%s\": %m",
+    								cstate->filename)));

-			if (S_ISDIR(st.st_mode))
-				ereport(ERROR,
-						(errcode(ERRCODE_WRONG_OBJECT_TYPE),
-						 errmsg("\"%s\" is a directory", cstate->filename)));
-		}
-	}
+    			if (S_ISDIR(st.st_mode))
+    				ereport(ERROR,
+    						(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+    						 errmsg("\"%s\" is a directory", cstate->filename)));
+    		}
+    	}
+    }

 	if (!cstate->binary)
 	{
@@ -5001,6 +5029,8 @@ void
 CopyFromBgwMainLoop(Datum main_arg)
 {
     volatile ParallelState *pst;
+    // volatile CopyState      cst;
+
     dsm_segment     *seg;
     shm_toc         *toc;
     int              myworkernumber;
@@ -5012,6 +5042,23 @@ CopyFromBgwMainLoop(Datum main_arg)
     void            *data;
     ConditionVariable cv;

+    HeapTuple    tuple;
+    Datum       *values;
+    bool       *nulls;
+    MemoryContext oldcontext = CurrentMemoryContext;
+
+	int			prev_leaf_part_index = -1;
+	ResultRelInfo *saved_resultRelInfo = NULL;
+
+	BulkInsertState bistate;
+	uint64		processed = 0;
+	int			nBufferedTuples = 0;
+
+#define MAX_BUFFERED_TUPLES 1000
+    HeapTuple  *bufferedTuples = NULL;    /* initialize to silence warning */
+    Size        bufferedTuplesSize = 0;
+    int         firstBufferedLineNo = 0;
+
     /*
      * Establish signal handlers.
      *
@@ -5066,10 +5113,25 @@ CopyFromBgwMainLoop(Datum main_arg)
     /*
      * Attach to the appropriate message queues.
      */
-    mq = shm_toc_lookup(toc, myworkernumber, false);
+    mq = shm_toc_lookup(toc, 3 + myworkernumber, false);
     shm_mq_set_receiver(mq, MyProc);
     mqh = shm_mq_attach(mq, seg, NULL);

+    // void *pt;
+    // pt = shm_toc_lookup(toc, 100500, false);
+    // int i;
+    // // iiiissssssss
+    // i = *(int *) pt;
+    // char *s;
+    // s = (char *) pt + sizeof(int);
+    //
+    //
+    // void *ptp;
+    //
+    // ptp = palloc(sizeof(int) + strlen(s));
+    // *ptp = i;
+
+
     /* Restore database connection. */
     BackgroundWorkerInitializeConnectionByOid(pst->database_id,
                                               pst->authenticated_user_id);
@@ -5105,7 +5167,44 @@ CopyFromBgwMainLoop(Datum main_arg)
     SetLatch(&registrant->procLatch);

     /* Do the work. */
-    // copy_messages(inqh, outqh);
+	values = (Datum *) palloc(pst->tupDesc->natts * sizeof(Datum));
+	nulls = (bool *) palloc(pst->tupDesc->natts * sizeof(bool));
+
+    volatile char *filename = shm_toc_lookup(toc, 2, false);
+    char *query_string;
+    query_string = filename;
+
+    elog(LOG, "BGWorker copying from query:\n %s", query_string);
+    // raw_parser_result = raw_parser(filename);
+    List	   *parsetree_list = pg_parse_query(query_string);
+    RawStmt    *parsetree = lfirst_node(RawStmt, list_head(parsetree_list));
+	List	   *querytree_list = pg_analyze_and_rewrite(parsetree, query_string,
+											NULL, 0, NULL);
+	List	   *plantree_list = pg_plan_queries(querytree_list,
+									CURSOR_OPT_PARALLEL_OK, NULL);
+    PlannedStmt *pstmt = lfirst_node(PlannedStmt, list_head(plantree_list));
+    // Node       *parsetree = pstmt->utilityStmt;
+    // elog(LOG, "BGWorker parse tree length: %d", raw_parser_result->length);
+    // PlannedStmt *pstmt = linitial_node(PlannedStmt, raw_parser_result);
+    CopyStmt *cstmnt = (CopyStmt *) pstmt->utilityStmt;
+
+    elog(LOG, "BGWorker filename from CopyStmt: %s", cstmnt->filename);
+    // elog(LOG, "BGWorker copying: %d %d %d", cst->binary, cst->ignore_errors, cst->csv_mode);
+
+	ParseState *pstate;
+	pstate = make_parsestate(NULL);
+	pstate->p_sourcetext = query_string;
+
+    StartTransactionCommand();
+
+    Relation	rel;
+    /* Open and lock the relation, using the appropriate lock type. */
+	rel = heap_openrv(cstmnt->relation, RowExclusiveLock);
+                      // (is_from ? RowExclusiveLock : AccessShareLock));
+
+    CopyState cstate = BeginCopyFrom(pstate, rel, cstmnt->filename, cstmnt->is_program,
+							   NULL, cstmnt->attlist, cstmnt->options);
+
     for (;;)
     {
         char *msg;
@@ -5126,6 +5225,10 @@ CopyFromBgwMainLoop(Datum main_arg)
         elog(LOG, "BGWorker #%d dummy processing line: %s", myworkernumber, msg);
     }

+    heap_close(rel, NoLock);
+        // (is_from ? NoLock : AccessShareLock));
+    CommitTransactionCommand();
+
     LWLockAcquire(CopyFromBgwLock, LW_EXCLUSIVE);
     ++pst->workers_finished;
     LWLockRelease(CopyFromBgwLock);
@@ -5133,6 +5236,9 @@ CopyFromBgwMainLoop(Datum main_arg)
     /* Signal main process that we are done. */
     ConditionVariableBroadcast(&cv);

+	pfree(values);
+	pfree(nulls);
+
     /*
      * We're done.  Explicitly detach the shared memory segment so that we
      * don't get a resource leak warning at commit time.  This will fire any
@@ -5169,8 +5275,11 @@ handle_sigterm(SIGNAL_ARGS)
  * for a test run.
  */
 static ParallelState*
-shm_mq_setup(int64 queue_size, int32 nworkers, dsm_segment **segp,
-                  shm_mq_handle ***mq_handles)
+setup_parallel_copy_from(int64 queue_size, int32 nworkers, dsm_segment **segp,
+                  shm_mq_handle ***mq_handles, CopyState cstate,
+                  ParseState *pstate,
+                  List *attnamelist,
+                  List *options)
 {
     int             i;
     dsm_segment     *seg;
@@ -5181,7 +5290,10 @@ shm_mq_setup(int64 queue_size, int32 nworkers, dsm_segment **segp,
     mqs = palloc0(sizeof(shm_mq *) * nworkers);

     /* Set up a dynamic shared memory segment. */
-    setup_dynamic_shared_memory(queue_size, nworkers, &seg, &pst, &mqs);
+    setup_dsm(queue_size, nworkers, &seg, &pst, &mqs, cstate,
+            pstate,
+            attnamelist,
+            options);
     *segp = seg;

     /* Register background workers. */
@@ -5211,21 +5323,31 @@ shm_mq_setup(int64 queue_size, int32 nworkers, dsm_segment **segp,
 /*
  * Set up a dynamic shared memory segment.
  *
- * We set up a small control region that contains only a test_shm_mq_header,
- * plus one region per message queue.  There are as many message queues as
- * the number of workers, plus one.
+ * We set up a control region that contains a ParallelState,
+ * plus one region per message queue. There are as many message queues as
+ * the number of workers.
  */
 static void
-setup_dynamic_shared_memory(int64 queue_size, int nworkers,
+setup_dsm(int64 queue_size, int nworkers,
                             dsm_segment **segp, ParallelState **pstp,
-                            shm_mq ***mqs)
+                            shm_mq ***mqs, CopyState cstate,
+                            ParseState *pstate,
+                            List *attnamelist,
+                            List *options)
 {
     shm_toc_estimator e;
     int               i;
+    int               toc_key = 0;
     Size              segsize;
     dsm_segment      *seg;
     shm_toc          *toc;
     ParallelState    *pst;
+    char             *shm_filename;
+    List             *shm_p_rtable;
+    List             *shm_attnamelist;
+    List             *shm_options;
+
+	Assert(cstate->rel);

     /* Ensure a valid queue size. */
     if (queue_size < 0 || ((uint64) queue_size) < shm_mq_minimum_size)
@@ -5245,13 +5367,20 @@ setup_dynamic_shared_memory(int64 queue_size, int nworkers,
      * requests, we must estimate each chunk separately.
      *
      * We need one key to register the location of the header, and we need
-     * nworkers + 1 keys to track the locations of the message queues.
+     * nworkers keys to track the locations of the message queues.
      */
     shm_toc_initialize_estimator(&e);
     shm_toc_estimate_chunk(&e, sizeof(ParallelState));
-    for (i = 0; i <= nworkers; ++i)
+
+    shm_toc_estimate_chunk(&e, sizeof(*pstate->p_rtable));
+    shm_toc_estimate_chunk(&e, strlen(ActivePortal->sourceText) * sizeof(char));
+    // shm_toc_estimate_chunk(&e, sizeof(*attnamelist));
+    shm_toc_estimate_chunk(&e, sizeof(*options));
+
+    for (i = 0; i < nworkers; ++i)
         shm_toc_estimate_chunk(&e, (Size) queue_size);
-    shm_toc_estimate_keys(&e, 1 + nworkers);
+
+    shm_toc_estimate_keys(&e, 1 + 4 + nworkers);
     segsize = shm_toc_estimate(&e);

     /* Create the shared memory segment and establish a table of contents. */
@@ -5269,7 +5398,208 @@ setup_dynamic_shared_memory(int64 queue_size, int nworkers,
     pst->database_id = MyDatabaseId;
     pst->authenticated_user_id = GetAuthenticatedUserId();
     ConditionVariableInit(&pst->cv);
-    shm_toc_insert(toc, 0, pst);
+
+    pst->is_program = cstate->is_program;
+    pst->rel = cstate->rel;
+
+    pst->estate = CreateExecutorState(); /* for ExecConstraints() */
+    pst->mycid = GetCurrentCommandId(true);
+    pst->hi_options = 0; /* start with default heap_insert options */
+
+    /*
+     * The target must be a plain relation or have an INSTEAD OF INSERT row
+     * trigger.  (Currently, such triggers are only allowed on views, so we
+     * only hint about them in the view case.)
+     */
+    if (cstate->rel->rd_rel->relkind != RELKIND_RELATION &&
+    	cstate->rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE &&
+    	!(cstate->rel->trigdesc &&
+    	  cstate->rel->trigdesc->trig_insert_instead_row))
+    {
+    	if (cstate->rel->rd_rel->relkind == RELKIND_VIEW)
+    		ereport(ERROR,
+    				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+    				 errmsg("cannot copy to view \"%s\"",
+    						RelationGetRelationName(cstate->rel)),
+    				 errhint("To enable copying to a view, provide an INSTEAD OF INSERT trigger.")));
+    	else if (cstate->rel->rd_rel->relkind == RELKIND_MATVIEW)
+    		ereport(ERROR,
+    				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+    				 errmsg("cannot copy to materialized view \"%s\"",
+    						RelationGetRelationName(cstate->rel))));
+    	else if (cstate->rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
+    		ereport(ERROR,
+    				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+    				 errmsg("cannot copy to foreign table \"%s\"",
+    						RelationGetRelationName(cstate->rel))));
+    	else if (cstate->rel->rd_rel->relkind == RELKIND_SEQUENCE)
+    		ereport(ERROR,
+    				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+    				 errmsg("cannot copy to sequence \"%s\"",
+    						RelationGetRelationName(cstate->rel))));
+    	else
+    		ereport(ERROR,
+    				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+    				 errmsg("cannot copy to non-table relation \"%s\"",
+    						RelationGetRelationName(cstate->rel))));
+    }
+
+    pst->tupDesc = RelationGetDescr(cstate->rel);
+
+    /*----------
+     * Check to see if we can avoid writing WAL
+     *
+     * If archive logging/streaming is not enabled *and* either
+     *	- table was created in same transaction as this COPY
+     *	- data is being written to relfilenode created in this transaction
+     * then we can skip writing WAL.  It's safe because if the transaction
+     * doesn't commit, we'll discard the table (or the new relfilenode file).
+     * If it does commit, we'll have done the heap_sync at the bottom of this
+     * routine first.
+     *
+     * As mentioned in comments in utils/rel.h, the in-same-transaction test
+     * is not always set correctly, since in rare cases rd_newRelfilenodeSubid
+     * can be cleared before the end of the transaction. The exact case is
+     * when a relation sets a new relfilenode twice in same transaction, yet
+     * the second one fails in an aborted subtransaction, e.g.
+     *
+     * BEGIN;
+     * TRUNCATE t;
+     * SAVEPOINT save;
+     * TRUNCATE t;
+     * ROLLBACK TO save;
+     * COPY ...
+     *
+     * Also, if the target file is new-in-transaction, we assume that checking
+     * FSM for free space is a waste of time, even if we must use WAL because
+     * of archiving.  This could possibly be wrong, but it's unlikely.
+     *
+     * The comments for heap_insert and RelationGetBufferForTuple specify that
+     * skipping WAL logging is only safe if we ensure that our tuples do not
+     * go into pages containing tuples from any other transactions --- but this
+     * must be the case if we have a new table or new relfilenode, so we need
+     * no additional work to enforce that.
+     *----------
+     */
+    /* createSubid is creation check, newRelfilenodeSubid is truncation check */
+    if (cstate->rel->rd_createSubid != InvalidSubTransactionId ||
+    	cstate->rel->rd_newRelfilenodeSubid != InvalidSubTransactionId)
+    {
+    	pst->hi_options |= HEAP_INSERT_SKIP_FSM;
+    	if (!XLogIsNeeded())
+    		pst->hi_options |= HEAP_INSERT_SKIP_WAL;
+    }
+
+    /*
+     * Optimize if new relfilenode was created in this subxact or one of its
+     * committed children and we won't see those rows later as part of an
+     * earlier scan or command. This ensures that if this subtransaction
+     * aborts then the frozen rows won't be visible after xact cleanup. Note
+     * that the stronger test of exactly which subtransaction created it is
+     * crucial for correctness of this optimization.
+     */
+    if (cstate->freeze)
+    {
+    	if (!ThereAreNoPriorRegisteredSnapshots() || !ThereAreNoReadyPortals())
+    		ereport(ERROR,
+    				(errcode(ERRCODE_INVALID_TRANSACTION_STATE),
+    				 errmsg("cannot perform FREEZE because of prior transaction activity")));
+
+    	if (cstate->rel->rd_createSubid != GetCurrentSubTransactionId() &&
+    		cstate->rel->rd_newRelfilenodeSubid != GetCurrentSubTransactionId())
+    		ereport(ERROR,
+    				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+    				 errmsg("cannot perform FREEZE because the table was not created or truncated in the current subtransaction")));
+
+    	pst->hi_options |= HEAP_INSERT_FROZEN;
+    }
+
+    /*
+     * We need a ResultRelInfo so we can use the regular executor's
+     * index-entry-making machinery.  (There used to be a huge amount of code
+     * here that basically duplicated execUtils.c ...)
+     */
+    pst->resultRelInfo = makeNode(ResultRelInfo);
+    InitResultRelInfo(pst->resultRelInfo,
+    				  cstate->rel,
+    				  1,		/* dummy rangetable index */
+    				  NULL,
+    				  0);
+
+    ExecOpenIndices(pst->resultRelInfo, false);
+
+    pst->estate->es_result_relations = pst->resultRelInfo;
+    pst->estate->es_num_result_relations = 1;
+    pst->estate->es_result_relation_info = pst->resultRelInfo;
+    pst->estate->es_range_table = cstate->range_table;
+
+    /* Set up a tuple slot too */
+    pst->myslot = ExecInitExtraTupleSlot(pst->estate);
+    ExecSetSlotDescriptor(pst->myslot, pst->tupDesc);
+    /* Triggers might need a slot as well */
+    pst->estate->es_trig_tuple_slot = ExecInitExtraTupleSlot(pst->estate);
+
+    /*
+     * It's more efficient to prepare a bunch of tuples for insertion, and
+     * insert them in one heap_multi_insert() call, than call heap_insert()
+     * separately for every tuple. However, we can't do that if there are
+     * BEFORE/INSTEAD OF triggers, or we need to evaluate volatile default
+     * expressions. Such triggers or expressions might query the table we're
+     * inserting to, and act differently if the tuples that have already been
+     * processed and prepared for insertion are not there.  We also can't do
+     * it if the table is partitioned.
+     */
+    pst->useHeapMultiInsert = !((pst->resultRelInfo->ri_TrigDesc != NULL &&
+    	 (pst->resultRelInfo->ri_TrigDesc->trig_insert_before_row ||
+    	  pst->resultRelInfo->ri_TrigDesc->trig_insert_instead_row)) ||
+    	cstate->partition_dispatch_info != NULL ||
+    	cstate->volatile_defexprs);
+
+    /* Prepare to catch AFTER triggers. */
+    AfterTriggerBeginQuery();
+
+    /*
+     * Check BEFORE STATEMENT insertion triggers. It's debatable whether we
+     * should do this for COPY, since it's not really an "INSERT" statement as
+     * such. However, executing these triggers maintains consistency with the
+     * EACH ROW triggers that we already fire on COPY.
+     */
+    ExecBSInsertTriggers(pst->estate, pst->resultRelInfo);
+
+    pst->bistate = GetBulkInsertState();
+    pst->econtext = GetPerTupleExprContext(pst->estate);
+
+    /* Set up callback to identify error line number */
+    pst->errcallback.callback = CopyFromErrorCallback;
+    pst->errcallback.arg = (void *) cstate;
+    pst->errcallback.previous = error_context_stack;
+    error_context_stack = &pst->errcallback;
+
+    shm_toc_insert(toc, toc_key++, pst);
+
+    shm_p_rtable    = shm_toc_allocate(toc, sizeof(*pstate->p_rtable));
+    shm_filename    = shm_toc_allocate(toc, strlen(ActivePortal->sourceText) * sizeof(char));
+    // shm_attnamelist = shm_toc_allocate(toc, sizeof(*attnamelist));
+    shm_options     = shm_toc_allocate(toc, sizeof(*options));
+
+    *shm_p_rtable    = *pstate->p_rtable;
+    // *shm_attnamelist = *attnamelist;
+    *shm_options     = *options;
+    strcpy(shm_filename, ActivePortal->sourceText);
+
+    shm_toc_insert(toc, toc_key++, shm_p_rtable);
+    shm_toc_insert(toc, toc_key++, shm_filename);
+    // shm_toc_insert(toc, toc_key++, shm_attnamelist);
+    shm_toc_insert(toc, toc_key++, shm_options);
+
+    // cst = shm_toc_allocate(toc, sizeof(CopyStateData) + strlen(cstate->filename) * sizeof(char));
+    // *cst = *cstate;
+    // cst->filename = shm_toc_allocate(toc, strlen(cstate->filename) * sizeof(char));
+    // // *cst->filename = *cstate->filename;
+    // strcpy(cst->filename, cstate->filename);
+    // // memcpy(cst->filename, cstate->filename, sizeof(*cstate->filename));
+    // // memcpy(cstate, cst, sizeof(CopyStateData));
+    // shm_toc_insert(toc, 1, cst);

     /* Set up one message queue per worker, plus one. */
     for (i = 0; i < nworkers; ++i)
@@ -5278,7 +5608,7 @@ setup_dynamic_shared_memory(int64 queue_size, int nworkers,

         mq = shm_mq_create(shm_toc_allocate(toc, (Size) queue_size),
                            (Size) queue_size);
-        shm_toc_insert(toc, i + 1, mq);
+        shm_toc_insert(toc, toc_key++, mq);
         shm_mq_set_sender(mq, MyProc);
         (*mqs)[i] = mq;
     }
@@ -5477,11 +5807,447 @@ wait_for_workers_to_finish(volatile ParallelState *pst)
     ConditionVariableCancelSleep();
 }

+// /*
+//  * Parse the current line into separate attributes (fields),
+//  * performing de-escaping as needed.
+//  *
+//  * The input is in line_buf.  We use attribute_buf to hold the result
+//  * strings.  cstate->raw_fields[k] is set to point to the k'th attribute
+//  * string, or NULL when the input matches the null marker string.
+//  * This array is expanded as necessary.
+//  *
+//  * (Note that the caller cannot check for nulls since the returned
+//  * string would be the post-de-escaping equivalent, which may look
+//  * the same as some valid data string.)
+//  *
+//  * delim is the column delimiter string (must be just one byte for now).
+//  * null_print is the null marker string.  Note that this is compared to
+//  * the pre-de-escaped input string.
+//  *
+//  * The return value is the number of fields actually read.
+//  */
+// static int
+// CopyReadTextAttrs(CopyState cstate)
+// {
+//     char        delimc = cstate->delim[0];
+//     int            fieldno;
+//     char       *output_ptr;
+//     char       *cur_ptr;
+//     char       *line_end_ptr;
+//
+//     /*
+//      * We need a special case for zero-column tables: check that the input
+//      * line is empty, and return.
+//      */
+//     if (cstate->max_fields <= 0)
+//     {
+//         if (cstate->line_buf.len != 0)
+//             ereport(ERROR,
+//                     (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+//                      errmsg("extra data after last expected column")));
+//         return 0;
+//     }
+//
+//     resetStringInfo(&cstate->attribute_buf);
+//
+//     /*
+//      * The de-escaped attributes will certainly not be longer than the input
+//      * data line, so we can just force attribute_buf to be large enough and
+//      * then transfer data without any checks for enough space.  We need to do
+//      * it this way because enlarging attribute_buf mid-stream would invalidate
+//      * pointers already stored into cstate->raw_fields[].
+//      */
+//     if (cstate->attribute_buf.maxlen <= cstate->line_buf.len)
+//         enlargeStringInfo(&cstate->attribute_buf, cstate->line_buf.len);
+//     output_ptr = cstate->attribute_buf.data;
+//
+//     /* set pointer variables for loop */
+//     cur_ptr = cstate->line_buf.data;
+//     line_end_ptr = cstate->line_buf.data + cstate->line_buf.len;
+//
+//     /* Outer loop iterates over fields */
+//     fieldno = 0;
+//     for (;;)
+//     {
+//         bool        found_delim = false;
+//         char       *start_ptr;
+//         char       *end_ptr;
+//         int            input_len;
+//         bool        saw_non_ascii = false;
+//
+//         /* Make sure there is enough space for the next value */
+//         if (fieldno >= cstate->max_fields)
+//         {
+//             cstate->max_fields *= 2;
+//             cstate->raw_fields =
+//                 repalloc(cstate->raw_fields, cstate->max_fields * sizeof(char *));
+//         }
+//
+//         /* Remember start of field on both input and output sides */
+//         start_ptr = cur_ptr;
+//         cstate->raw_fields[fieldno] = output_ptr;
+//
+//         /*
+//          * Scan data for field.
+//          *
+//          * Note that in this loop, we are scanning to locate the end of field
+//          * and also speculatively performing de-escaping.  Once we find the
+//          * end-of-field, we can match the raw field contents against the null
+//          * marker string.  Only after that comparison fails do we know that
+//          * de-escaping is actually the right thing to do; therefore we *must
+//          * not* throw any syntax errors before we've done the null-marker
+//          * check.
+//          */
+//         for (;;)
+//         {
+//             char        c;
+//
+//             end_ptr = cur_ptr;
+//             if (cur_ptr >= line_end_ptr)
+//                 break;
+//             c = *cur_ptr++;
+//             if (c == delimc)
+//             {
+//                 found_delim = true;
+//                 break;
+//             }
+//             if (c == '\\')
+//             {
+//                 if (cur_ptr >= line_end_ptr)
+//                     break;
+//                 c = *cur_ptr++;
+//                 switch (c)
+//                 {
+//                     case '0':
+//                     case '1':
+//                     case '2':
+//                     case '3':
+//                     case '4':
+//                     case '5':
+//                     case '6':
+//                     case '7':
+//                         {
+//                             /* handle \013 */
+//                             int            val;
+//
+//                             val = OCTVALUE(c);
+//                             if (cur_ptr < line_end_ptr)
+//                             {
+//                                 c = *cur_ptr;
+//                                 if (ISOCTAL(c))
+//                                 {
+//                                     cur_ptr++;
+//                                     val = (val << 3) + OCTVALUE(c);
+//                                     if (cur_ptr < line_end_ptr)
+//                                     {
+//                                         c = *cur_ptr;
+//                                         if (ISOCTAL(c))
+//                                         {
+//                                             cur_ptr++;
+//                                             val = (val << 3) + OCTVALUE(c);
+//                                         }
+//                                     }
+//                                 }
+//                             }
+//                             c = val & 0377;
+//                             if (c == '\0' || IS_HIGHBIT_SET(c))
+//                                 saw_non_ascii = true;
+//                         }
+//                         break;
+//                     case 'x':
+//                         /* Handle \x3F */
+//                         if (cur_ptr < line_end_ptr)
+//                         {
+//                             char        hexchar = *cur_ptr;
+//
+//                             if (isxdigit((unsigned char) hexchar))
+//                             {
+//                                 int            val = GetDecimalFromHex(hexchar);
+//
+//                                 cur_ptr++;
+//                                 if (cur_ptr < line_end_ptr)
+//                                 {
+//                                     hexchar = *cur_ptr;
+//                                     if (isxdigit((unsigned char) hexchar))
+//                                     {
+//                                         cur_ptr++;
+//                                         val = (val << 4) + GetDecimalFromHex(hexchar);
+//                                     }
+//                                 }
+//                                 c = val & 0xff;
+//                                 if (c == '\0' || IS_HIGHBIT_SET(c))
+//                                     saw_non_ascii = true;
+//                             }
+//                         }
+//                         break;
+//                     case 'b':
+//                         c = '\b';
+//                         break;
+//                     case 'f':
+//                         c = '\f';
+//                         break;
+//                     case 'n':
+//                         c = '\n';
+//                         break;
+//                     case 'r':
+//                         c = '\r';
+//                         break;
+//                     case 't':
+//                         c = '\t';
+//                         break;
+//                     case 'v':
+//                         c = '\v';
+//                         break;
+//
+//                         /*
+//                          * in all other cases, take the char after '\'
+//                          * literally
+//                          */
+//                 }
+//             }
+//
+//             /* Add c to output string */
+//             *output_ptr++ = c;
+//         }
+//
+//         /* Check whether raw input matched null marker */
+//         input_len = end_ptr - start_ptr;
+//         if (input_len == cstate->null_print_len &&
+//             strncmp(start_ptr, cstate->null_print, input_len) == 0)
+//             cstate->raw_fields[fieldno] = NULL;
+//         else
+//         {
+//             /*
+//              * At this point we know the field is supposed to contain data.
+//              *
+//              * If we de-escaped any non-7-bit-ASCII chars, make sure the
+//              * resulting string is valid data for the db encoding.
+//              */
+//             if (saw_non_ascii)
+//             {
+//                 char       *fld = cstate->raw_fields[fieldno];
+//
+//                 pg_verifymbstr(fld, output_ptr - fld, false);
+//             }
+//         }
+//
+//         /* Terminate attribute value in output area */
+//         *output_ptr++ = '\0';
+//
+//         fieldno++;
+//         /* Done if we hit EOL instead of a delim */
+//         if (!found_delim)
+//             break;
+//     }
+//
+//     /* Clean up state of attribute_buf */
+//     output_ptr--;
+//     Assert(*output_ptr == '\0');
+//     cstate->attribute_buf.len = (output_ptr - cstate->attribute_buf.data);
+//
+//     return fieldno;
+// }
+//
+// /*
+//  * Parse the current line into separate attributes (fields),
+//  * performing de-escaping as needed.  This has exactly the same API as
+//  * CopyReadAttributesText, except we parse the fields according to
+//  * "standard" (i.e. common) CSV usage.
+//  */
+// static int
+// CopyReadCSVAttrs(CopyState cstate)
+// {
+//     char        delimc = cstate->delim[0];
+//     char        quotec = cstate->quote[0];
+//     char        escapec = cstate->escape[0];
+//     int            fieldno;
+//     char       *output_ptr;
+//     char       *cur_ptr;
+//     char       *line_end_ptr;
+//
+//     /*
+//      * We need a special case for zero-column tables: check that the input
+//      * line is empty, and return.
+//      */
+//     if (cstate->max_fields <= 0)
+//     {
+//         if (cstate->line_buf.len != 0)
+//             ereport(ERROR,
+//                     (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+//                      errmsg("extra data after last expected column")));
+//         return 0;
+//     }
+//
+//     resetStringInfo(&cstate->attribute_buf);
+//
+//     /*
+//      * The de-escaped attributes will certainly not be longer than the input
+//      * data line, so we can just force attribute_buf to be large enough and
+//      * then transfer data without any checks for enough space.  We need to do
+//      * it this way because enlarging attribute_buf mid-stream would invalidate
+//      * pointers already stored into cstate->raw_fields[].
+//      */
+//     if (cstate->attribute_buf.maxlen <= cstate->line_buf.len)
+//         enlargeStringInfo(&cstate->attribute_buf, cstate->line_buf.len);
+//     output_ptr = cstate->attribute_buf.data;
+//
+//     /* set pointer variables for loop */
+//     cur_ptr = cstate->line_buf.data;
+//     line_end_ptr = cstate->line_buf.data + cstate->line_buf.len;
+//
+//     /* Outer loop iterates over fields */
+//     fieldno = 0;
+//     for (;;)
+//     {
+//         bool        found_delim = false;
+//         bool        saw_quote = false;
+//         char       *start_ptr;
+//         char       *end_ptr;
+//         int            input_len;
+//
+//         /* Make sure there is enough space for the next value */
+//         if (fieldno >= cstate->max_fields)
+//         {
+//             cstate->max_fields *= 2;
+//             cstate->raw_fields =
+//                 repalloc(cstate->raw_fields, cstate->max_fields * sizeof(char *));
+//         }
+//
+//         /* Remember start of field on both input and output sides */
+//         start_ptr = cur_ptr;
+//         cstate->raw_fields[fieldno] = output_ptr;
+//
+//         /*
+//          * Scan data for field,
+//          *
+//          * The loop starts in "not quote" mode and then toggles between that
+//          * and "in quote" mode. The loop exits normally if it is in "not
+//          * quote" mode and a delimiter or line end is seen.
+//          */
+//         for (;;)
+//         {
+//             char        c;
+//
+//             /* Not in quote */
+//             for (;;)
+//             {
+//                 end_ptr = cur_ptr;
+//                 if (cur_ptr >= line_end_ptr)
+//                     goto endfield;
+//                 c = *cur_ptr++;
+//                 /* unquoted field delimiter */
+//                 if (c == delimc)
+//                 {
+//                     found_delim = true;
+//                     goto endfield;
+//                 }
+//                 /* start of quoted field (or part of field) */
+//                 if (c == quotec)
+//                 {
+//                     saw_quote = true;
+//                     break;
+//                 }
+//                 /* Add c to output string */
+//                 *output_ptr++ = c;
+//             }
+//
+//             /* In quote */
+//             for (;;)
+//             {
+//                 end_ptr = cur_ptr;
+//                 if (cur_ptr >= line_end_ptr)
+//                     ereport(ERROR,
+//                             (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+//                              errmsg("unterminated CSV quoted field")));
+//
+//                 c = *cur_ptr++;
+//
+//                 /* escape within a quoted field */
+//                 if (c == escapec)
+//                 {
+//                     /*
+//                      * peek at the next char if available, and escape it if it
+//                      * is an escape char or a quote char
+//                      */
+//                     if (cur_ptr < line_end_ptr)
+//                     {
+//                         char        nextc = *cur_ptr;
+//
+//                         if (nextc == escapec || nextc == quotec)
+//                         {
+//                             *output_ptr++ = nextc;
+//                             cur_ptr++;
+//                             continue;
+//                         }
+//                     }
+//                 }
+//
+//                 /*
+//                  * end of quoted field. Must do this test after testing for
+//                  * escape in case quote char and escape char are the same
+//                  * (which is the common case).
+//                  */
+//                 if (c == quotec)
+//                     break;
+//
+//                 /* Add c to output string */
+//                 *output_ptr++ = c;
+//             }
+//         }
+// endfield:
+//
+//         /* Terminate attribute value in output area */
+//         *output_ptr++ = '\0';
+//
+//         /* Check whether raw input matched null marker */
+//         input_len = end_ptr - start_ptr;
+//         if (!saw_quote && input_len == cstate->null_print_len &&
+//             strncmp(start_ptr, cstate->null_print, input_len) == 0)
+//             cstate->raw_fields[fieldno] = NULL;
+//
+//         fieldno++;
+//         /* Done if we hit EOL instead of a delim */
+//         if (!found_delim)
+//             break;
+//     }
+//
+//     /* Clean up state of attribute_buf */
+//     output_ptr--;
+//     Assert(*output_ptr == '\0');
+//     cstate->attribute_buf.len = (output_ptr - cstate->attribute_buf.data);
+//
+//     return fieldno;
+// }
+//
+// /*
+//  *
+//  */
+// bool
+// CopyFromReadAttributes(char *line, bool csv_mode, char ***fields, int *nfields)
+// {
+//     int fldct;
+//
+//     /* Parse the line into de-escaped field values */
+//     if (csv_mode)
+//         fldct = CopyReadCSVAttrs(cstate);
+//     else
+//         fldct = CopyReadTextAttrs(cstate);
+//
+//     *fields = cstate->raw_fields;
+//     *nfields = fldct;
+//     return true;
+// }
+
 /*
  * Parallel Copy FROM file to relation.
  */
 extern uint64
-ParallelCopyFrom(CopyState cstate)
+ParallelCopyFrom(CopyState cstate, ParseState *pstate,
+			  Relation rel,
+			  const char *filename,
+			  bool is_program,
+			  List *attnamelist,
+			  List *options)
 {
     ParallelState  *pst;
     dsm_segment    *seg;
@@ -5491,222 +6257,45 @@ ParallelCopyFrom(CopyState cstate)
     shm_mq_result   shmq_res;
     int             last_worker_used = 0;
     // int64           message = 0;
-    char           *message;
+    // char           *message;
     // char           *message_contents = VARDATA_ANY(message);
     // int             message_size = VARSIZE_ANY_EXHDR(message);
     int             message_size = sizeof(char);

     mq_handles = palloc0(sizeof(shm_mq_handle *) * nworkers);

-	HeapTuple	tuple;
-	TupleDesc	tupDesc;
-	Datum	   *values;
-	bool	   *nulls;
-	ResultRelInfo *resultRelInfo;
-	ResultRelInfo *saved_resultRelInfo = NULL;
-	EState	   *estate = CreateExecutorState(); /* for ExecConstraints() */
-	ExprContext *econtext;
-	TupleTableSlot *myslot;
-	MemoryContext oldcontext = CurrentMemoryContext;
-
-	ErrorContextCallback errcallback;
-	CommandId	mycid = GetCurrentCommandId(true);
-	int			hi_options = 0; /* start with default heap_insert options */
-	BulkInsertState bistate;
-	uint64		processed = 0;
-	bool		useHeapMultiInsert;
-	int			nBufferedTuples = 0;
-	int			prev_leaf_part_index = -1;
-
-#define MAX_BUFFERED_TUPLES 1000
-	HeapTuple  *bufferedTuples = NULL;	/* initialize to silence warning */
-	Size		bufferedTuplesSize = 0;
-	int			firstBufferedLineNo = 0;
+    MemoryContext oldcontext = CurrentMemoryContext;

-	Assert(cstate->rel);
+    // TupleDesc    tupDesc;
+    // ResultRelInfo *resultRelInfo;
+    // EState       *estate = CreateExecutorState(); /* for ExecConstraints() */
+    // ExprContext *econtext;
+    // TupleTableSlot *myslot;
+    //
+    // ErrorContextCallback errcallback;
+    // CommandId    mycid = GetCurrentCommandId(true);
+    // int            hi_options = 0; /* start with default heap_insert options */
+    // BulkInsertState bistate;
+    // bool        useHeapMultiInsert;
+
+    // ResultRelInfo *saved_resultRelInfo = NULL;
+    // int            prev_leaf_part_index = -1;
+    uint64        processed = 0;
+    // int            nBufferedTuples = 0;
+// #define MAX_BUFFERED_TUPLES 1000
+//     HeapTuple  *bufferedTuples = NULL;    /* initialize to silence warning */
+//     Size        bufferedTuplesSize = 0;
+//     int            firstBufferedLineNo = 0;

     // MQ size = 100 messages x 80 chars each
-    pst = shm_mq_setup(message_size * 80 * queue_size, nworkers, &seg, &mq_handles);
-
-	/*
-	 * The target must be a plain relation or have an INSTEAD OF INSERT row
-	 * trigger.  (Currently, such triggers are only allowed on views, so we
-	 * only hint about them in the view case.)
-	 */
-	if (cstate->rel->rd_rel->relkind != RELKIND_RELATION &&
-		cstate->rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE &&
-		!(cstate->rel->trigdesc &&
-		  cstate->rel->trigdesc->trig_insert_instead_row))
-	{
-		if (cstate->rel->rd_rel->relkind == RELKIND_VIEW)
-			ereport(ERROR,
-					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
-					 errmsg("cannot copy to view \"%s\"",
-							RelationGetRelationName(cstate->rel)),
-					 errhint("To enable copying to a view, provide an INSTEAD OF INSERT trigger.")));
-		else if (cstate->rel->rd_rel->relkind == RELKIND_MATVIEW)
-			ereport(ERROR,
-					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
-					 errmsg("cannot copy to materialized view \"%s\"",
-							RelationGetRelationName(cstate->rel))));
-		else if (cstate->rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
-			ereport(ERROR,
-					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
-					 errmsg("cannot copy to foreign table \"%s\"",
-							RelationGetRelationName(cstate->rel))));
-		else if (cstate->rel->rd_rel->relkind == RELKIND_SEQUENCE)
-			ereport(ERROR,
-					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
-					 errmsg("cannot copy to sequence \"%s\"",
-							RelationGetRelationName(cstate->rel))));
-		else
-			ereport(ERROR,
-					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
-					 errmsg("cannot copy to non-table relation \"%s\"",
-							RelationGetRelationName(cstate->rel))));
-	}
-
-	tupDesc = RelationGetDescr(cstate->rel);
-
-	/*----------
-	 * Check to see if we can avoid writing WAL
-	 *
-	 * If archive logging/streaming is not enabled *and* either
-	 *	- table was created in same transaction as this COPY
-	 *	- data is being written to relfilenode created in this transaction
-	 * then we can skip writing WAL.  It's safe because if the transaction
-	 * doesn't commit, we'll discard the table (or the new relfilenode file).
-	 * If it does commit, we'll have done the heap_sync at the bottom of this
-	 * routine first.
-	 *
-	 * As mentioned in comments in utils/rel.h, the in-same-transaction test
-	 * is not always set correctly, since in rare cases rd_newRelfilenodeSubid
-	 * can be cleared before the end of the transaction. The exact case is
-	 * when a relation sets a new relfilenode twice in same transaction, yet
-	 * the second one fails in an aborted subtransaction, e.g.
-	 *
-	 * BEGIN;
-	 * TRUNCATE t;
-	 * SAVEPOINT save;
-	 * TRUNCATE t;
-	 * ROLLBACK TO save;
-	 * COPY ...
-	 *
-	 * Also, if the target file is new-in-transaction, we assume that checking
-	 * FSM for free space is a waste of time, even if we must use WAL because
-	 * of archiving.  This could possibly be wrong, but it's unlikely.
-	 *
-	 * The comments for heap_insert and RelationGetBufferForTuple specify that
-	 * skipping WAL logging is only safe if we ensure that our tuples do not
-	 * go into pages containing tuples from any other transactions --- but this
-	 * must be the case if we have a new table or new relfilenode, so we need
-	 * no additional work to enforce that.
-	 *----------
-	 */
-	/* createSubid is creation check, newRelfilenodeSubid is truncation check */
-	if (cstate->rel->rd_createSubid != InvalidSubTransactionId ||
-		cstate->rel->rd_newRelfilenodeSubid != InvalidSubTransactionId)
-	{
-		hi_options |= HEAP_INSERT_SKIP_FSM;
-		if (!XLogIsNeeded())
-			hi_options |= HEAP_INSERT_SKIP_WAL;
-	}
-
-	/*
-	 * Optimize if new relfilenode was created in this subxact or one of its
-	 * committed children and we won't see those rows later as part of an
-	 * earlier scan or command. This ensures that if this subtransaction
-	 * aborts then the frozen rows won't be visible after xact cleanup. Note
-	 * that the stronger test of exactly which subtransaction created it is
-	 * crucial for correctness of this optimization.
-	 */
-	if (cstate->freeze)
-	{
-		if (!ThereAreNoPriorRegisteredSnapshots() || !ThereAreNoReadyPortals())
-			ereport(ERROR,
-					(errcode(ERRCODE_INVALID_TRANSACTION_STATE),
-					 errmsg("cannot perform FREEZE because of prior transaction activity")));
-
-		if (cstate->rel->rd_createSubid != GetCurrentSubTransactionId() &&
-			cstate->rel->rd_newRelfilenodeSubid != GetCurrentSubTransactionId())
-			ereport(ERROR,
-					(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
-					 errmsg("cannot perform FREEZE because the table was not created or truncated in the current subtransaction")));
-
-		hi_options |= HEAP_INSERT_FROZEN;
-	}
-
-	/*
-	 * We need a ResultRelInfo so we can use the regular executor's
-	 * index-entry-making machinery.  (There used to be a huge amount of code
-	 * here that basically duplicated execUtils.c ...)
-	 */
-	resultRelInfo = makeNode(ResultRelInfo);
-	InitResultRelInfo(resultRelInfo,
-					  cstate->rel,
-					  1,		/* dummy rangetable index */
-					  NULL,
-					  0);
-
-	ExecOpenIndices(resultRelInfo, false);
-
-	estate->es_result_relations = resultRelInfo;
-	estate->es_num_result_relations = 1;
-	estate->es_result_relation_info = resultRelInfo;
-	estate->es_range_table = cstate->range_table;
-
-	/* Set up a tuple slot too */
-	myslot = ExecInitExtraTupleSlot(estate);
-	ExecSetSlotDescriptor(myslot, tupDesc);
-	/* Triggers might need a slot as well */
-	estate->es_trig_tuple_slot = ExecInitExtraTupleSlot(estate);
-
-	/*
-	 * It's more efficient to prepare a bunch of tuples for insertion, and
-	 * insert them in one heap_multi_insert() call, than call heap_insert()
-	 * separately for every tuple. However, we can't do that if there are
-	 * BEFORE/INSTEAD OF triggers, or we need to evaluate volatile default
-	 * expressions. Such triggers or expressions might query the table we're
-	 * inserting to, and act differently if the tuples that have already been
-	 * processed and prepared for insertion are not there.  We also can't do
-	 * it if the table is partitioned.
-	 */
-	if ((resultRelInfo->ri_TrigDesc != NULL &&
-		 (resultRelInfo->ri_TrigDesc->trig_insert_before_row ||
-		  resultRelInfo->ri_TrigDesc->trig_insert_instead_row)) ||
-		cstate->partition_dispatch_info != NULL ||
-		cstate->volatile_defexprs)
-	{
-		useHeapMultiInsert = false;
-	}
-	else
-	{
-		useHeapMultiInsert = true;
-		bufferedTuples = palloc(MAX_BUFFERED_TUPLES * sizeof(HeapTuple));
-	}
-
-	/* Prepare to catch AFTER triggers. */
-	AfterTriggerBeginQuery();
-
-	/*
-	 * Check BEFORE STATEMENT insertion triggers. It's debatable whether we
-	 * should do this for COPY, since it's not really an "INSERT" statement as
-	 * such. However, executing these triggers maintains consistency with the
-	 * EACH ROW triggers that we already fire on COPY.
-	 */
-	ExecBSInsertTriggers(estate, resultRelInfo);
-
-	values = (Datum *) palloc(tupDesc->natts * sizeof(Datum));
-	nulls = (bool *) palloc(tupDesc->natts * sizeof(bool));
-
-	bistate = GetBulkInsertState();
-	econtext = GetPerTupleExprContext(estate);
-
-	/* Set up callback to identify error line number */
-	errcallback.callback = CopyFromErrorCallback;
-	errcallback.arg = (void *) cstate;
-	errcallback.previous = error_context_stack;
-	error_context_stack = &errcallback;
+    pst = setup_parallel_copy_from(message_size * 80 * queue_size,
+        nworkers,
+        &seg,
+        &mq_handles,
+        cstate,
+        pstate,
+        attnamelist,
+        options);

     // // BG Worker startup
     // RegisterDynamicBackgroundWorker(&worker, &bgwhandle);
@@ -5714,6 +6303,8 @@ ParallelCopyFrom(CopyState cstate)
     // elog(LOG, "Main COPY process (pid %d): BGWorker started (pid %d)", MyProcPid, bgwpid);
     // // BG Worker startup

+    elog(LOG, "Copying from file %s", cstate->filename);
+
 	for (;;)
 	{
         // TupleTableSlot *slot;
@@ -6066,17 +6657,14 @@ ParallelCopyFrom(CopyState cstate)
 		pq_endmsgread();

 	/* Execute AFTER STATEMENT insertion triggers */
-	ExecASInsertTriggers(estate, resultRelInfo, cstate->transition_capture);
+	ExecASInsertTriggers(pst->estate, pst->resultRelInfo, cstate->transition_capture);

 	/* Handle queued AFTER triggers */
-	AfterTriggerEndQuery(estate);
-
-	pfree(values);
-	pfree(nulls);
+	AfterTriggerEndQuery(pst->estate);

-	ExecResetTupleTable(estate->es_tupleTable, false);
+	ExecResetTupleTable(pst->estate->es_tupleTable, false);

-	ExecCloseIndices(resultRelInfo);
+	ExecCloseIndices(pst->resultRelInfo);

 	/* Close all the partitioned tables, leaf partitions, and their indices */
 	if (cstate->partition_dispatch_info)
@@ -6109,15 +6697,15 @@ ParallelCopyFrom(CopyState cstate)
 	}

 	/* Close any trigger target relations */
-	ExecCleanUpTriggerState(estate);
+	ExecCleanUpTriggerState(pst->estate);

-	FreeExecutorState(estate);
+	FreeExecutorState(pst->estate);

 	/*
 	 * If we skipped writing WAL, then we need to sync the heap (but not
 	 * indexes since those use WAL anyway)
 	 */
-	if (hi_options & HEAP_INSERT_SKIP_WAL)
+	if (pst->hi_options & HEAP_INSERT_SKIP_WAL)
 		heap_sync(cstate->rel);

     /* Clean up. */
diff --git a/src/include/commands/copy.h b/src/include/commands/copy.h
index 2935e567ae..dbc769828f 100644
--- a/src/include/commands/copy.h
+++ b/src/include/commands/copy.h
@@ -40,7 +40,12 @@ extern void CopyFromErrorCallback(void *arg);

 extern void CopyFromBgwMainLoop(Datum main_arg);
 extern uint64 CopyFrom(CopyState cstate);
-extern uint64 ParallelCopyFrom(CopyState cstate);
+extern uint64 ParallelCopyFrom(CopyState cstate, ParseState *pstate,
+			  Relation rel,
+			  const char *filename,
+			  bool is_program,
+			  List *attnamelist,
+			  List *options);

 extern DestReceiver *CreateCopyDestReceiver(void);

--
2.11.0


From 749717cbf9049ed419f99177bb5bcd1aad68b3eb Mon Sep 17 00:00:00 2001
From: Alex K <alex.lumir@gmail.com>
Date: Fri, 18 Aug 2017 18:34:39 +0300
Subject: [PATCH 10/13] Working parallel COPY FROM

---
 src/backend/commands/copy.c | 1747 ++++++++++++++++---------------------------
 1 file changed, 638 insertions(+), 1109 deletions(-)

diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index 7f38e70bf3..083291d1c7 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -257,8 +257,8 @@ typedef struct

 typedef struct
 {
-    slock_t mutex;
-    int curr_line;
+    // slock_t mutex;
+    uint64 processed;
     int workers_total;
     int workers_attached;
     int workers_ready;
@@ -266,21 +266,6 @@ typedef struct
     Oid database_id;
     Oid authenticated_user_id;
     ConditionVariable cv;
-
-    bool     is_program;
-    Relation rel;
-
-	TupleDesc       tupDesc;
-	ResultRelInfo  *resultRelInfo;
-	EState         *estate;
-	ExprContext    *econtext;
-	TupleTableSlot *myslot;
-
-	ErrorContextCallback errcallback;
-	CommandId	mycid;
-	int			hi_options;
-	BulkInsertState bistate;
-	bool		useHeapMultiInsert;
 } ParallelState;

 // TODO Consider change
@@ -3341,26 +3326,29 @@ NextCopyFromRawFields(CopyState cstate, char ***fields, int *nfields)
 	/* only available for text or csv input */
 	Assert(!cstate->binary);

-	/* on input just throw the header line away */
-	if (cstate->cur_lineno == 0 && cstate->header_line)
-	{
-		cstate->cur_lineno++;
-		if (CopyReadLine(cstate))
-			return false;		/* done */
-	}
+    if (!IsBackgroundWorker)
+    {
+    	/* on input just throw the header line away */
+    	if (cstate->cur_lineno == 0 && cstate->header_line)
+    	{
+    		cstate->cur_lineno++;
+    		if (CopyReadLine(cstate))
+    			return false;		/* done */
+    	}

-	cstate->cur_lineno++;
+    	cstate->cur_lineno++;

-	/* Actually read the line into memory here */
-	done = CopyReadLine(cstate);
+    	/* Actually read the line into memory here */
+    	done = CopyReadLine(cstate);

-	/*
-	 * EOF at start of line means we're done.  If we see EOF after some
-	 * characters, we act as though it was newline followed by EOF, ie,
-	 * process the line and then exit loop on next iteration.
-	 */
-	if (done && cstate->line_buf.len == 0)
-		return false;
+    	/*
+    	 * EOF at start of line means we're done.  If we see EOF after some
+    	 * characters, we act as though it was newline followed by EOF, ie,
+    	 * process the line and then exit loop on next iteration.
+    	 */
+    	if (done && cstate->line_buf.len == 0)
+    		return false;
+    }

 	/* Parse the line into de-escaped field values */
 	if (cstate->csv_mode)
@@ -5029,7 +5017,6 @@ void
 CopyFromBgwMainLoop(Datum main_arg)
 {
     volatile ParallelState *pst;
-    // volatile CopyState      cst;

     dsm_segment     *seg;
     shm_toc         *toc;
@@ -5044,21 +5031,43 @@ CopyFromBgwMainLoop(Datum main_arg)

     HeapTuple    tuple;
     Datum       *values;
-    bool       *nulls;
+    bool        *nulls;
     MemoryContext oldcontext = CurrentMemoryContext;

-	int			prev_leaf_part_index = -1;
-	ResultRelInfo *saved_resultRelInfo = NULL;
+    int            prev_leaf_part_index = -1;
+    ResultRelInfo *saved_resultRelInfo = NULL;

-	BulkInsertState bistate;
-	uint64		processed = 0;
-	int			nBufferedTuples = 0;
+    uint64          processed = 0;
+    int             nBufferedTuples = 0;

 #define MAX_BUFFERED_TUPLES 1000
     HeapTuple  *bufferedTuples = NULL;    /* initialize to silence warning */
     Size        bufferedTuplesSize = 0;
     int         firstBufferedLineNo = 0;

+    TupleDesc       tupDesc;
+    ResultRelInfo  *resultRelInfo;
+    EState         *estate;
+    ExprContext    *econtext;
+    TupleTableSlot *myslot;
+
+    ErrorContextCallback errcallback;
+    CommandId       mycid;
+    int             hi_options;
+    BulkInsertState bistate;
+    bool            useHeapMultiInsert;
+
+    char        *query_string;
+    List	    *parsetree_list;
+    RawStmt     *parsetree;
+	List	    *querytree_list;
+    List	    *plantree_list;
+    PlannedStmt *pstmt;
+    CopyStmt    *cstmnt;
+    ParseState  *pstate;
+    CopyState    cstate;
+    Relation	 rel;
+
     /*
      * Establish signal handlers.
      *
@@ -5079,7 +5088,7 @@ CopyFromBgwMainLoop(Datum main_arg)
      * of contents so we can locate the various data structures we'll need to
      * find within the segment.
      */
-    CurrentResourceOwner = ResourceOwnerCreate(NULL, "test_shm_mq worker");
+    CurrentResourceOwner = ResourceOwnerCreate(NULL, "COPY FROM worker");
     seg = dsm_attach(DatumGetInt32(main_arg));
     if (seg == NULL)
         ereport(ERROR,
@@ -5113,25 +5122,10 @@ CopyFromBgwMainLoop(Datum main_arg)
     /*
      * Attach to the appropriate message queues.
      */
-    mq = shm_toc_lookup(toc, 3 + myworkernumber, false);
+    mq = shm_toc_lookup(toc, 1 + myworkernumber, false);
     shm_mq_set_receiver(mq, MyProc);
     mqh = shm_mq_attach(mq, seg, NULL);

-    // void *pt;
-    // pt = shm_toc_lookup(toc, 100500, false);
-    // int i;
-    // // iiiissssssss
-    // i = *(int *) pt;
-    // char *s;
-    // s = (char *) pt + sizeof(int);
-    //
-    //
-    // void *ptp;
-    //
-    // ptp = palloc(sizeof(int) + strlen(s));
-    // *ptp = i;
-
-
     /* Restore database connection. */
     BackgroundWorkerInitializeConnectionByOid(pst->database_id,
                                               pst->authenticated_user_id);
@@ -5142,6 +5136,23 @@ CopyFromBgwMainLoop(Datum main_arg)
      */
     SetClientEncoding(GetDatabaseEncoding());

+    query_string = shm_toc_lookup(toc, 1, false);
+
+    elog(LOG, "BGWorker copying from query:\n %s", query_string);
+    parsetree_list = pg_parse_query(query_string);
+    parsetree = lfirst_node(RawStmt, list_head(parsetree_list));
+	querytree_list = pg_analyze_and_rewrite(parsetree, query_string,
+											NULL, 0, NULL);
+	plantree_list = pg_plan_queries(querytree_list,
+									CURSOR_OPT_PARALLEL_OK, NULL);
+    pstmt = lfirst_node(PlannedStmt, list_head(plantree_list));
+    cstmnt = (CopyStmt *) pstmt->utilityStmt;
+
+    elog(LOG, "BGWorker #%d filename from CopyStmt: %s", myworkernumber, cstmnt->filename);
+
+	pstate = make_parsestate(NULL);
+	pstate->p_sourcetext = query_string;
+
     /*
      * Indicate that we're fully initialized and ready to begin the main part
      * of the parallel operation.
@@ -5155,59 +5166,233 @@ CopyFromBgwMainLoop(Datum main_arg)
     // SpinLockAcquire(&pst->mutex);
     ++pst->workers_ready;
     cv = pst->cv;
-    elog(LOG, "BGWorker #%d started", myworkernumber);
-    // SpinLockRelease(&pst->mutex);
+    // if (pst->workers_ready == pst->workers_total)
+    // {
+    //     registrant = BackendPidGetProc(MyBgworkerEntry->bgw_notify_pid);
+    //     if (registrant == NULL)
+    //     {
+    //         elog(DEBUG1, "registrant backend has exited prematurely");
+    //         proc_exit(1);
+    //     }
+    //     SetLatch(&registrant->procLatch);
+    // }
+    // elog(LOG, "BGWorker #%d started", myworkernumber);
+    // // SpinLockRelease(&pst->mutex);
     LWLockRelease(CopyFromBgwLock);
+
     registrant = BackendPidGetProc(MyBgworkerEntry->bgw_notify_pid);
-    if (registrant == NULL)
+    SetLatch(&registrant->procLatch);
+
+    StartTransactionCommand();
+
+    /* Open and lock the relation, using the appropriate lock type. */
+	rel = heap_openrv(cstmnt->relation, RowExclusiveLock);
+                      // (is_from ? RowExclusiveLock : AccessShareLock));
+
+    cstate = BeginCopyFrom(pstate, rel, cstmnt->filename, cstmnt->is_program,
+							   NULL, cstmnt->attlist, cstmnt->options);
+
+    estate = CreateExecutorState(); /* for ExecConstraints() */
+    mycid = GetCurrentCommandId(true);
+    hi_options = 0; /* start with default heap_insert options */
+
+    /*
+     * The target must be a plain relation or have an INSTEAD OF INSERT row
+     * trigger.  (Currently, such triggers are only allowed on views, so we
+     * only hint about them in the view case.)
+     */
+    if (cstate->rel->rd_rel->relkind != RELKIND_RELATION &&
+        cstate->rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE &&
+        !(cstate->rel->trigdesc &&
+          cstate->rel->trigdesc->trig_insert_instead_row))
     {
-        elog(DEBUG1, "registrant backend has exited prematurely");
-        proc_exit(1);
+        if (cstate->rel->rd_rel->relkind == RELKIND_VIEW)
+            ereport(ERROR,
+                    (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+                     errmsg("cannot copy to view \"%s\"",
+                            RelationGetRelationName(cstate->rel)),
+                     errhint("To enable copying to a view, provide an INSTEAD OF INSERT trigger.")));
+        else if (cstate->rel->rd_rel->relkind == RELKIND_MATVIEW)
+            ereport(ERROR,
+                    (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+                     errmsg("cannot copy to materialized view \"%s\"",
+                            RelationGetRelationName(cstate->rel))));
+        else if (cstate->rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
+            ereport(ERROR,
+                    (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+                     errmsg("cannot copy to foreign table \"%s\"",
+                            RelationGetRelationName(cstate->rel))));
+        else if (cstate->rel->rd_rel->relkind == RELKIND_SEQUENCE)
+            ereport(ERROR,
+                    (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+                     errmsg("cannot copy to sequence \"%s\"",
+                            RelationGetRelationName(cstate->rel))));
+        else
+            ereport(ERROR,
+                    (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+                     errmsg("cannot copy to non-table relation \"%s\"",
+                            RelationGetRelationName(cstate->rel))));
     }
-    SetLatch(&registrant->procLatch);

-    /* Do the work. */
-	values = (Datum *) palloc(pst->tupDesc->natts * sizeof(Datum));
-	nulls = (bool *) palloc(pst->tupDesc->natts * sizeof(bool));
+    tupDesc = RelationGetDescr(cstate->rel);

-    volatile char *filename = shm_toc_lookup(toc, 2, false);
-    char *query_string;
-    query_string = filename;
+    /*----------
+     * Check to see if we can avoid writing WAL
+     *
+     * If archive logging/streaming is not enabled *and* either
+     *    - table was created in same transaction as this COPY
+     *    - data is being written to relfilenode created in this transaction
+     * then we can skip writing WAL.  It's safe because if the transaction
+     * doesn't commit, we'll discard the table (or the new relfilenode file).
+     * If it does commit, we'll have done the heap_sync at the bottom of this
+     * routine first.
+     *
+     * As mentioned in comments in utils/rel.h, the in-same-transaction test
+     * is not always set correctly, since in rare cases rd_newRelfilenodeSubid
+     * can be cleared before the end of the transaction. The exact case is
+     * when a relation sets a new relfilenode twice in same transaction, yet
+     * the second one fails in an aborted subtransaction, e.g.
+     *
+     * BEGIN;
+     * TRUNCATE t;
+     * SAVEPOINT save;
+     * TRUNCATE t;
+     * ROLLBACK TO save;
+     * COPY ...
+     *
+     * Also, if the target file is new-in-transaction, we assume that checking
+     * FSM for free space is a waste of time, even if we must use WAL because
+     * of archiving.  This could possibly be wrong, but it's unlikely.
+     *
+     * The comments for heap_insert and RelationGetBufferForTuple specify that
+     * skipping WAL logging is only safe if we ensure that our tuples do not
+     * go into pages containing tuples from any other transactions --- but this
+     * must be the case if we have a new table or new relfilenode, so we need
+     * no additional work to enforce that.
+     *----------
+     */
+    /* createSubid is creation check, newRelfilenodeSubid is truncation check */
+    if (cstate->rel->rd_createSubid != InvalidSubTransactionId ||
+        cstate->rel->rd_newRelfilenodeSubid != InvalidSubTransactionId)
+    {
+        hi_options |= HEAP_INSERT_SKIP_FSM;
+        if (!XLogIsNeeded())
+            hi_options |= HEAP_INSERT_SKIP_WAL;
+    }

-    elog(LOG, "BGWorker copying from query:\n %s", query_string);
-    // raw_parser_result = raw_parser(filename);
-    List	   *parsetree_list = pg_parse_query(query_string);
-    RawStmt    *parsetree = lfirst_node(RawStmt, list_head(parsetree_list));
-	List	   *querytree_list = pg_analyze_and_rewrite(parsetree, query_string,
-											NULL, 0, NULL);
-	List	   *plantree_list = pg_plan_queries(querytree_list,
-									CURSOR_OPT_PARALLEL_OK, NULL);
-    PlannedStmt *pstmt = lfirst_node(PlannedStmt, list_head(plantree_list));
-    // Node       *parsetree = pstmt->utilityStmt;
-    // elog(LOG, "BGWorker parse tree length: %d", raw_parser_result->length);
-    // PlannedStmt *pstmt = linitial_node(PlannedStmt, raw_parser_result);
-    CopyStmt *cstmnt = (CopyStmt *) pstmt->utilityStmt;
+    /*
+     * Optimize if new relfilenode was created in this subxact or one of its
+     * committed children and we won't see those rows later as part of an
+     * earlier scan or command. This ensures that if this subtransaction
+     * aborts then the frozen rows won't be visible after xact cleanup. Note
+     * that the stronger test of exactly which subtransaction created it is
+     * crucial for correctness of this optimization.
+     */
+    if (cstate->freeze)
+    {
+        if (!ThereAreNoPriorRegisteredSnapshots() || !ThereAreNoReadyPortals())
+            ereport(ERROR,
+                    (errcode(ERRCODE_INVALID_TRANSACTION_STATE),
+                     errmsg("cannot perform FREEZE because of prior transaction activity")));

-    elog(LOG, "BGWorker filename from CopyStmt: %s", cstmnt->filename);
-    // elog(LOG, "BGWorker copying: %d %d %d", cst->binary, cst->ignore_errors, cst->csv_mode);
+        if (cstate->rel->rd_createSubid != GetCurrentSubTransactionId() &&
+            cstate->rel->rd_newRelfilenodeSubid != GetCurrentSubTransactionId())
+            ereport(ERROR,
+                    (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+                     errmsg("cannot perform FREEZE because the table was not created or truncated in the current subtransaction")));

-	ParseState *pstate;
-	pstate = make_parsestate(NULL);
-	pstate->p_sourcetext = query_string;
+        hi_options |= HEAP_INSERT_FROZEN;
+    }

-    StartTransactionCommand();
+    /*
+     * We need a ResultRelInfo so we can use the regular executor's
+     * index-entry-making machinery.  (There used to be a huge amount of code
+     * here that basically duplicated execUtils.c ...)
+     */
+    resultRelInfo = makeNode(ResultRelInfo);
+    InitResultRelInfo(resultRelInfo,
+                      cstate->rel,
+                      1,        /* dummy rangetable index */
+                      NULL,
+                      0);

-    Relation	rel;
-    /* Open and lock the relation, using the appropriate lock type. */
-	rel = heap_openrv(cstmnt->relation, RowExclusiveLock);
-                      // (is_from ? RowExclusiveLock : AccessShareLock));
+    ExecOpenIndices(resultRelInfo, false);

-    CopyState cstate = BeginCopyFrom(pstate, rel, cstmnt->filename, cstmnt->is_program,
-							   NULL, cstmnt->attlist, cstmnt->options);
+    estate->es_result_relations = resultRelInfo;
+    estate->es_num_result_relations = 1;
+    estate->es_result_relation_info = resultRelInfo;
+    estate->es_range_table = cstate->range_table;
+
+    /* Set up a tuple slot too */
+    myslot = ExecInitExtraTupleSlot(estate);
+    ExecSetSlotDescriptor(myslot, tupDesc);
+    /* Triggers might need a slot as well */
+    estate->es_trig_tuple_slot = ExecInitExtraTupleSlot(estate);
+
+    /*
+     * It's more efficient to prepare a bunch of tuples for insertion, and
+     * insert them in one heap_multi_insert() call, than call heap_insert()
+     * separately for every tuple. However, we can't do that if there are
+     * BEFORE/INSTEAD OF triggers, or we need to evaluate volatile default
+     * expressions. Such triggers or expressions might query the table we're
+     * inserting to, and act differently if the tuples that have already been
+     * processed and prepared for insertion are not there.  We also can't do
+     * it if the table is partitioned.
+     */
+    useHeapMultiInsert = !((resultRelInfo->ri_TrigDesc != NULL &&
+         (resultRelInfo->ri_TrigDesc->trig_insert_before_row ||
+          resultRelInfo->ri_TrigDesc->trig_insert_instead_row)) ||
+        cstate->partition_dispatch_info != NULL ||
+        cstate->volatile_defexprs);
+
+    if (useHeapMultiInsert)
+        bufferedTuples = palloc(MAX_BUFFERED_TUPLES * sizeof(HeapTuple));
+
+    /* Prepare to catch AFTER triggers. */
+    AfterTriggerBeginQuery();
+
+    /*
+     * Check BEFORE STATEMENT insertion triggers. It's debatable whether we
+     * should do this for COPY, since it's not really an "INSERT" statement as
+     * such. However, executing these triggers maintains consistency with the
+     * EACH ROW triggers that we already fire on COPY.
+     */
+    ExecBSInsertTriggers(estate, resultRelInfo);
+
+    bistate = GetBulkInsertState();
+    econtext = GetPerTupleExprContext(estate);
+
+    /* Set up callback to identify error line number */
+    errcallback.callback = CopyFromErrorCallback;
+    errcallback.arg = (void *) cstate;
+    errcallback.previous = error_context_stack;
+    error_context_stack = &errcallback;
+
+    /* Do the work. */
+    values = (Datum *) palloc(tupDesc->natts * sizeof(Datum));
+    nulls = (bool *) palloc(tupDesc->natts * sizeof(bool));

     for (;;)
     {
         char *msg;
+        bool  skip_tuple;
+        Oid   loaded_oid = InvalidOid;
+        int   next_cf_state; /* NextCopyFrom return state */
+        TupleTableSlot *slot;
+
+
+        if (nBufferedTuples == 0)
+        {
+            /*
+             * Reset the per-tuple exprcontext. We can only do this if the
+             * tuple buffer is empty. (Calling the context the per-tuple
+             * memory context is a bit of a misnomer now.)
+             */
+            ResetPerTupleExprContext(estate);
+        }
+
+		/* Switch into its memory context */
+        MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));

         CHECK_FOR_INTERRUPTS();

@@ -5221,23 +5406,355 @@ CopyFromBgwMainLoop(Datum main_arg)
             elog(LOG, "BGWorker #%d: got zero-length message, stopping", myworkernumber);
             break;
         }
-        // elog(LOG, "BGWorker #%d dummy processing line #%ld", myworkernumber, *(int64 *) data);
-        elog(LOG, "BGWorker #%d dummy processing line: %s", myworkernumber, msg);
+        elog(LOG, "BGWorker #%d processing line: %s", myworkernumber, msg);
+
+        cstate->line_buf.data = msg;
+        cstate->line_buf.len  = len;
+
+        next_cf_state = NextCopyFrom(cstate, econtext, values, nulls, &loaded_oid);
+
+        if (!next_cf_state) {
+            break;
+        }
+            else if (next_cf_state == NCF_SUCCESS)
+            {
+                /* And now we can form the input tuple. */
+                tuple = heap_form_tuple(tupDesc, values, nulls);
+
+                if (loaded_oid != InvalidOid)
+                    HeapTupleSetOid(tuple, loaded_oid);
+
+                /*
+                 * Constraints might reference the tableoid column, so initialize
+                 * t_tableOid before evaluating them.
+                 */
+                tuple->t_tableOid = RelationGetRelid(resultRelInfo->ri_RelationDesc);
+
+                /* Triggers and stuff need to be invoked in query context. */
+                MemoryContextSwitchTo(oldcontext);
+
+                /* Place tuple in tuple slot --- but slot shouldn't free it */
+                slot = myslot;
+                ExecStoreTuple(tuple, slot, InvalidBuffer, false);
+
+                /* Determine the partition to heap_insert the tuple into */
+                if (cstate->partition_dispatch_info)
+                {
+                    int            leaf_part_index;
+                    TupleConversionMap *map;
+
+                    /*
+                     * Away we go ... If we end up not finding a partition after all,
+                     * ExecFindPartition() does not return and errors out instead.
+                     * Otherwise, the returned value is to be used as an index into
+                     * arrays mt_partitions[] and mt_partition_tupconv_maps[] that
+                     * will get us the ResultRelInfo and TupleConversionMap for the
+                     * partition, respectively.
+                     */
+                    leaf_part_index = ExecFindPartition(resultRelInfo,
+                                                        cstate->partition_dispatch_info,
+                                                        slot,
+                                                        estate);
+                    Assert(leaf_part_index >= 0 &&
+                           leaf_part_index < cstate->num_partitions);
+
+                    /*
+                     * If this tuple is mapped to a partition that is not same as the
+                     * previous one, we'd better make the bulk insert mechanism gets a
+                     * new buffer.
+                     */
+                    if (prev_leaf_part_index != leaf_part_index)
+                    {
+                        ReleaseBulkInsertStatePin(bistate);
+                        prev_leaf_part_index = leaf_part_index;
+                    }
+
+                    /*
+                     * Save the old ResultRelInfo and switch to the one corresponding
+                     * to the selected partition.
+                     */
+                    saved_resultRelInfo = resultRelInfo;
+                    resultRelInfo = cstate->partitions + leaf_part_index;
+
+                    /* We do not yet have a way to insert into a foreign partition */
+                    if (resultRelInfo->ri_FdwRoutine)
+                        ereport(ERROR,
+                                (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                                 errmsg("cannot route inserted tuples to a foreign table")));
+
+                    /*
+                     * For ExecInsertIndexTuples() to work on the partition's indexes
+                     */
+                    estate->es_result_relation_info = resultRelInfo;
+
+                    /*
+                     * If we're capturing transition tuples, we might need to convert
+                     * from the partition rowtype to parent rowtype.
+                     */
+                    if (cstate->transition_capture != NULL)
+                    {
+                        if (resultRelInfo->ri_TrigDesc &&
+                            (resultRelInfo->ri_TrigDesc->trig_insert_before_row ||
+                             resultRelInfo->ri_TrigDesc->trig_insert_instead_row))
+                        {
+                            /*
+                             * If there are any BEFORE or INSTEAD triggers on the
+                             * partition, we'll have to be ready to convert their
+                             * result back to tuplestore format.
+                             */
+                            cstate->transition_capture->tcs_original_insert_tuple = NULL;
+                            cstate->transition_capture->tcs_map =
+                                cstate->transition_tupconv_maps[leaf_part_index];
+                        }
+                        else
+                        {
+                            /*
+                             * Otherwise, just remember the original unconverted
+                             * tuple, to avoid a needless round trip conversion.
+                             */
+                            cstate->transition_capture->tcs_original_insert_tuple = tuple;
+                            cstate->transition_capture->tcs_map = NULL;
+                        }
+                    }
+                    /*
+                     * We might need to convert from the parent rowtype to the
+                     * partition rowtype.
+                     */
+                    map = cstate->partition_tupconv_maps[leaf_part_index];
+                    if (map)
+                    {
+                        Relation    partrel = resultRelInfo->ri_RelationDesc;
+
+                        tuple = do_convert_tuple(tuple, map);
+
+                        /*
+                         * We must use the partition's tuple descriptor from this
+                         * point on.  Use a dedicated slot from this point on until
+                         * we're finished dealing with the partition.
+                         */
+                        slot = cstate->partition_tuple_slot;
+                        Assert(slot != NULL);
+                        ExecSetSlotDescriptor(slot, RelationGetDescr(partrel));
+                        ExecStoreTuple(tuple, slot, InvalidBuffer, true);
+                    }
+
+                    tuple->t_tableOid = RelationGetRelid(resultRelInfo->ri_RelationDesc);
+                }
+
+                skip_tuple = false;
+
+                /* BEFORE ROW INSERT Triggers */
+                if (resultRelInfo->ri_TrigDesc &&
+                    resultRelInfo->ri_TrigDesc->trig_insert_before_row)
+                {
+                    slot = ExecBRInsertTriggers(estate, resultRelInfo, slot);
+
+                    if (slot == NULL)    /* "do nothing" */
+                        skip_tuple = true;
+                    else                /* trigger might have changed tuple */
+                        tuple = ExecMaterializeSlot(slot);
+                }
+            }
+            else
+            {
+                skip_tuple = true;
+            }
+
+        if (!skip_tuple)
+        {
+            if (resultRelInfo->ri_TrigDesc &&
+                resultRelInfo->ri_TrigDesc->trig_insert_instead_row)
+            {
+                /* Pass the data to the INSTEAD ROW INSERT trigger */
+                ExecIRInsertTriggers(estate, resultRelInfo, slot);
+            }
+            else
+            {
+                /*
+                 * We always check the partition constraint, including when
+                 * the tuple got here via tuple-routing.  However we don't
+                 * need to in the latter case if no BR trigger is defined on
+                 * the partition.  Note that a BR trigger might modify the
+                 * tuple such that the partition constraint is no longer
+                 * satisfied, so we need to check in that case.
+                 */
+                bool        check_partition_constr =
+                (resultRelInfo->ri_PartitionCheck != NIL);
+
+                if (saved_resultRelInfo != NULL &&
+                    !(resultRelInfo->ri_TrigDesc &&
+                      resultRelInfo->ri_TrigDesc->trig_insert_before_row))
+                    check_partition_constr = false;
+
+                /* Check the constraints of the tuple */
+                if (cstate->rel->rd_att->constr || check_partition_constr)
+                    ExecConstraints(resultRelInfo, slot, estate);
+
+                if (useHeapMultiInsert)
+                {
+                    /* Add this tuple to the tuple buffer */
+                    if (nBufferedTuples == 0)
+                        firstBufferedLineNo = cstate->cur_lineno;
+                    bufferedTuples[nBufferedTuples++] = tuple;
+                    bufferedTuplesSize += tuple->t_len;
+
+                    /*
+                     * If the buffer filled up, flush it.  Also flush if the
+                     * total size of all the tuples in the buffer becomes
+                     * large, to avoid using large amounts of memory for the
+                     * buffer when the tuples are exceptionally wide.
+                     */
+                    if (nBufferedTuples == MAX_BUFFERED_TUPLES ||
+                        bufferedTuplesSize > 65535)
+                    {
+                        CopyFromInsertBatch(cstate, estate, mycid, hi_options,
+                                            resultRelInfo, myslot, bistate,
+                                            nBufferedTuples, bufferedTuples,
+                                            firstBufferedLineNo);
+                        nBufferedTuples = 0;
+                        bufferedTuplesSize = 0;
+                    }
+                }
+                else
+                {
+                    List       *recheckIndexes = NIL;
+
+                    /* OK, store the tuple and create index entries for it */
+                    heap_insert(resultRelInfo->ri_RelationDesc, tuple, mycid,
+                                hi_options, bistate);
+
+                    if (resultRelInfo->ri_NumIndices > 0)
+                        recheckIndexes = ExecInsertIndexTuples(slot,
+                                                               &(tuple->t_self),
+                                                               estate,
+                                                               false,
+                                                               NULL,
+                                                               NIL);
+
+                    /* AFTER ROW INSERT Triggers */
+                    ExecARInsertTriggers(estate, resultRelInfo, tuple,
+                                         recheckIndexes, cstate->transition_capture);
+
+                    list_free(recheckIndexes);
+                }
+            }
+
+            /*
+             * We count only tuples not suppressed by a BEFORE INSERT trigger;
+             * this is the same definition used by execMain.c for counting
+             * tuples inserted by an INSERT command.
+             */
+            processed++;
+
+            if (saved_resultRelInfo)
+            {
+                resultRelInfo = saved_resultRelInfo;
+                estate->es_result_relation_info = resultRelInfo;
+            }
+        }
     }

+    /* Flush any remaining buffered tuples */
+    if (nBufferedTuples > 0)
+        CopyFromInsertBatch(cstate, estate, mycid, hi_options,
+                            resultRelInfo, myslot, bistate,
+                            nBufferedTuples, bufferedTuples,
+                            firstBufferedLineNo);
+
+    /* Done, clean up */
+    error_context_stack = errcallback.previous;
+
+    FreeBulkInsertState(bistate);
+
+	MemoryContextSwitchTo(oldcontext);
+
+	/*
+	 * In the old protocol, tell pqcomm that we can process normal protocol
+	 * messages again.
+	 */
+	if (cstate->copy_dest == COPY_OLD_FE)
+		pq_endmsgread();
+
+	/* Execute AFTER STATEMENT insertion triggers */
+    ExecASInsertTriggers(estate, resultRelInfo, cstate->transition_capture);
+
+	/* Handle queued AFTER triggers */
+    AfterTriggerEndQuery(estate);
+
+    ExecResetTupleTable(estate->es_tupleTable, false);
+
+    ExecCloseIndices(resultRelInfo);
+
+	/* Close all the partitioned tables, leaf partitions, and their indices */
+	if (cstate->partition_dispatch_info)
+	{
+		int			i;
+
+		/*
+		 * Remember cstate->partition_dispatch_info[0] corresponds to the root
+		 * partitioned table, which we must not try to close, because it is
+		 * the main target table of COPY that will be closed eventually by
+		 * DoCopy().  Also, tupslot is NULL for the root partitioned table.
+		 */
+		for (i = 1; i < cstate->num_dispatch; i++)
+		{
+			PartitionDispatch pd = cstate->partition_dispatch_info[i];
+
+			heap_close(pd->reldesc, NoLock);
+			ExecDropSingleTupleTableSlot(pd->tupslot);
+		}
+		for (i = 0; i < cstate->num_partitions; i++)
+		{
+			ResultRelInfo *resultRelInfo = cstate->partitions + i;
+
+			ExecCloseIndices(resultRelInfo);
+			heap_close(resultRelInfo->ri_RelationDesc, NoLock);
+		}
+
+		/* Release the standalone partition tuple descriptor */
+		ExecDropSingleTupleTableSlot(cstate->partition_tuple_slot);
+	}
+
+	/* Close any trigger target relations */
+    ExecCleanUpTriggerState(estate);
+
+    FreeExecutorState(estate);
+
+	/*
+	 * If we skipped writing WAL, then we need to sync the heap (but not
+	 * indexes since those use WAL anyway)
+	 */
+    if (hi_options & HEAP_INSERT_SKIP_WAL)
+        heap_sync(cstate->rel);
+
     heap_close(rel, NoLock);
         // (is_from ? NoLock : AccessShareLock));
     CommitTransactionCommand();

     LWLockAcquire(CopyFromBgwLock, LW_EXCLUSIVE);
     ++pst->workers_finished;
+    pst->processed += processed;
+
+    // if (pst->workers_finished == pst->workers_total)
+    // {
+    //     registrant = BackendPidGetProc(MyBgworkerEntry->bgw_notify_pid);
+    //     if (registrant == NULL)
+    //     {
+    //         elog(DEBUG1, "registrant backend has exited prematurely");
+    //         proc_exit(1);
+    //     }
+    //     SetLatch(&registrant->procLatch);
+    // }
     LWLockRelease(CopyFromBgwLock);

     /* Signal main process that we are done. */
     ConditionVariableBroadcast(&cv);

-	pfree(values);
-	pfree(nulls);
+    // TODO Segmentation fault:
+    // * frame #0: 0x0000000107519678 postgres`GetMemoryChunkContext(pointer=0x00007fdf268bf858) at memutils.h:124
+    //   frame #1: 0x00000001075194e5 postgres`pfree(pointer=0x00007fdf268bf858) at mcxt.c:952
+    // pfree(values);
+    // pfree(nulls);

     /*
      * We're done.  Explicitly detach the shared memory segment so that we
@@ -5307,6 +5824,10 @@ setup_parallel_copy_from(int64 queue_size, int32 nworkers, dsm_segment **segp,

     /* Wait for workers to become ready. */
     wait_for_workers_to_become_ready(wstate, pst);
+    /* Wait to be signalled. */
+    // WaitLatch(MyLatch, WL_LATCH_SET, 0, PG_WAIT_EXTENSION);
+    /* Reset the latch so we don't spin. */
+    // ResetLatch(MyLatch);

     /*
      * Once we reach this point, all workers are ready.  We no longer need to
@@ -5342,10 +5863,7 @@ setup_dsm(int64 queue_size, int nworkers,
     dsm_segment      *seg;
     shm_toc          *toc;
     ParallelState    *pst;
-    char             *shm_filename;
-    List             *shm_p_rtable;
-    List             *shm_attnamelist;
-    List             *shm_options;
+    char             *shm_query;

 	Assert(cstate->rel);

@@ -5372,15 +5890,12 @@ setup_dsm(int64 queue_size, int nworkers,
     shm_toc_initialize_estimator(&e);
     shm_toc_estimate_chunk(&e, sizeof(ParallelState));

-    shm_toc_estimate_chunk(&e, sizeof(*pstate->p_rtable));
     shm_toc_estimate_chunk(&e, strlen(ActivePortal->sourceText) * sizeof(char));
-    // shm_toc_estimate_chunk(&e, sizeof(*attnamelist));
-    shm_toc_estimate_chunk(&e, sizeof(*options));

     for (i = 0; i < nworkers; ++i)
         shm_toc_estimate_chunk(&e, (Size) queue_size);

-    shm_toc_estimate_keys(&e, 1 + 4 + nworkers);
+    shm_toc_estimate_keys(&e, 1 + 1 + nworkers);
     segsize = shm_toc_estimate(&e);

     /* Create the shared memory segment and establish a table of contents. */
@@ -5395,211 +5910,18 @@ setup_dsm(int64 queue_size, int nworkers,
     pst->workers_attached = 0;
     pst->workers_ready = 0;
     pst->workers_finished = 0;
+    pst->processed = 0;
     pst->database_id = MyDatabaseId;
     pst->authenticated_user_id = GetAuthenticatedUserId();
     ConditionVariableInit(&pst->cv);

-    pst->is_program = cstate->is_program;
-    pst->rel = cstate->rel;
-
-    pst->estate = CreateExecutorState(); /* for ExecConstraints() */
-    pst->mycid = GetCurrentCommandId(true);
-    pst->hi_options = 0; /* start with default heap_insert options */
-
-    /*
-     * The target must be a plain relation or have an INSTEAD OF INSERT row
-     * trigger.  (Currently, such triggers are only allowed on views, so we
-     * only hint about them in the view case.)
-     */
-    if (cstate->rel->rd_rel->relkind != RELKIND_RELATION &&
-    	cstate->rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE &&
-    	!(cstate->rel->trigdesc &&
-    	  cstate->rel->trigdesc->trig_insert_instead_row))
-    {
-    	if (cstate->rel->rd_rel->relkind == RELKIND_VIEW)
-    		ereport(ERROR,
-    				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
-    				 errmsg("cannot copy to view \"%s\"",
-    						RelationGetRelationName(cstate->rel)),
-    				 errhint("To enable copying to a view, provide an INSTEAD OF INSERT trigger.")));
-    	else if (cstate->rel->rd_rel->relkind == RELKIND_MATVIEW)
-    		ereport(ERROR,
-    				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
-    				 errmsg("cannot copy to materialized view \"%s\"",
-    						RelationGetRelationName(cstate->rel))));
-    	else if (cstate->rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
-    		ereport(ERROR,
-    				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
-    				 errmsg("cannot copy to foreign table \"%s\"",
-    						RelationGetRelationName(cstate->rel))));
-    	else if (cstate->rel->rd_rel->relkind == RELKIND_SEQUENCE)
-    		ereport(ERROR,
-    				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
-    				 errmsg("cannot copy to sequence \"%s\"",
-    						RelationGetRelationName(cstate->rel))));
-    	else
-    		ereport(ERROR,
-    				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
-    				 errmsg("cannot copy to non-table relation \"%s\"",
-    						RelationGetRelationName(cstate->rel))));
-    }
-
-    pst->tupDesc = RelationGetDescr(cstate->rel);
-
-    /*----------
-     * Check to see if we can avoid writing WAL
-     *
-     * If archive logging/streaming is not enabled *and* either
-     *	- table was created in same transaction as this COPY
-     *	- data is being written to relfilenode created in this transaction
-     * then we can skip writing WAL.  It's safe because if the transaction
-     * doesn't commit, we'll discard the table (or the new relfilenode file).
-     * If it does commit, we'll have done the heap_sync at the bottom of this
-     * routine first.
-     *
-     * As mentioned in comments in utils/rel.h, the in-same-transaction test
-     * is not always set correctly, since in rare cases rd_newRelfilenodeSubid
-     * can be cleared before the end of the transaction. The exact case is
-     * when a relation sets a new relfilenode twice in same transaction, yet
-     * the second one fails in an aborted subtransaction, e.g.
-     *
-     * BEGIN;
-     * TRUNCATE t;
-     * SAVEPOINT save;
-     * TRUNCATE t;
-     * ROLLBACK TO save;
-     * COPY ...
-     *
-     * Also, if the target file is new-in-transaction, we assume that checking
-     * FSM for free space is a waste of time, even if we must use WAL because
-     * of archiving.  This could possibly be wrong, but it's unlikely.
-     *
-     * The comments for heap_insert and RelationGetBufferForTuple specify that
-     * skipping WAL logging is only safe if we ensure that our tuples do not
-     * go into pages containing tuples from any other transactions --- but this
-     * must be the case if we have a new table or new relfilenode, so we need
-     * no additional work to enforce that.
-     *----------
-     */
-    /* createSubid is creation check, newRelfilenodeSubid is truncation check */
-    if (cstate->rel->rd_createSubid != InvalidSubTransactionId ||
-    	cstate->rel->rd_newRelfilenodeSubid != InvalidSubTransactionId)
-    {
-    	pst->hi_options |= HEAP_INSERT_SKIP_FSM;
-    	if (!XLogIsNeeded())
-    		pst->hi_options |= HEAP_INSERT_SKIP_WAL;
-    }
-
-    /*
-     * Optimize if new relfilenode was created in this subxact or one of its
-     * committed children and we won't see those rows later as part of an
-     * earlier scan or command. This ensures that if this subtransaction
-     * aborts then the frozen rows won't be visible after xact cleanup. Note
-     * that the stronger test of exactly which subtransaction created it is
-     * crucial for correctness of this optimization.
-     */
-    if (cstate->freeze)
-    {
-    	if (!ThereAreNoPriorRegisteredSnapshots() || !ThereAreNoReadyPortals())
-    		ereport(ERROR,
-    				(errcode(ERRCODE_INVALID_TRANSACTION_STATE),
-    				 errmsg("cannot perform FREEZE because of prior transaction activity")));
-
-    	if (cstate->rel->rd_createSubid != GetCurrentSubTransactionId() &&
-    		cstate->rel->rd_newRelfilenodeSubid != GetCurrentSubTransactionId())
-    		ereport(ERROR,
-    				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
-    				 errmsg("cannot perform FREEZE because the table was not created or truncated in the current subtransaction")));
-
-    	pst->hi_options |= HEAP_INSERT_FROZEN;
-    }
-
-    /*
-     * We need a ResultRelInfo so we can use the regular executor's
-     * index-entry-making machinery.  (There used to be a huge amount of code
-     * here that basically duplicated execUtils.c ...)
-     */
-    pst->resultRelInfo = makeNode(ResultRelInfo);
-    InitResultRelInfo(pst->resultRelInfo,
-    				  cstate->rel,
-    				  1,		/* dummy rangetable index */
-    				  NULL,
-    				  0);
-
-    ExecOpenIndices(pst->resultRelInfo, false);
-
-    pst->estate->es_result_relations = pst->resultRelInfo;
-    pst->estate->es_num_result_relations = 1;
-    pst->estate->es_result_relation_info = pst->resultRelInfo;
-    pst->estate->es_range_table = cstate->range_table;
-
-    /* Set up a tuple slot too */
-    pst->myslot = ExecInitExtraTupleSlot(pst->estate);
-    ExecSetSlotDescriptor(pst->myslot, pst->tupDesc);
-    /* Triggers might need a slot as well */
-    pst->estate->es_trig_tuple_slot = ExecInitExtraTupleSlot(pst->estate);
-
-    /*
-     * It's more efficient to prepare a bunch of tuples for insertion, and
-     * insert them in one heap_multi_insert() call, than call heap_insert()
-     * separately for every tuple. However, we can't do that if there are
-     * BEFORE/INSTEAD OF triggers, or we need to evaluate volatile default
-     * expressions. Such triggers or expressions might query the table we're
-     * inserting to, and act differently if the tuples that have already been
-     * processed and prepared for insertion are not there.  We also can't do
-     * it if the table is partitioned.
-     */
-    pst->useHeapMultiInsert = !((pst->resultRelInfo->ri_TrigDesc != NULL &&
-    	 (pst->resultRelInfo->ri_TrigDesc->trig_insert_before_row ||
-    	  pst->resultRelInfo->ri_TrigDesc->trig_insert_instead_row)) ||
-    	cstate->partition_dispatch_info != NULL ||
-    	cstate->volatile_defexprs);
-
-    /* Prepare to catch AFTER triggers. */
-    AfterTriggerBeginQuery();
-
-    /*
-     * Check BEFORE STATEMENT insertion triggers. It's debatable whether we
-     * should do this for COPY, since it's not really an "INSERT" statement as
-     * such. However, executing these triggers maintains consistency with the
-     * EACH ROW triggers that we already fire on COPY.
-     */
-    ExecBSInsertTriggers(pst->estate, pst->resultRelInfo);
-
-    pst->bistate = GetBulkInsertState();
-    pst->econtext = GetPerTupleExprContext(pst->estate);
+    shm_toc_insert(toc, toc_key++, pst);

-    /* Set up callback to identify error line number */
-    pst->errcallback.callback = CopyFromErrorCallback;
-    pst->errcallback.arg = (void *) cstate;
-    pst->errcallback.previous = error_context_stack;
-    error_context_stack = &pst->errcallback;
+    shm_query    = shm_toc_allocate(toc, strlen(ActivePortal->sourceText) * sizeof(char));

-    shm_toc_insert(toc, toc_key++, pst);
+    strcpy(shm_query, ActivePortal->sourceText);

-    shm_p_rtable    = shm_toc_allocate(toc, sizeof(*pstate->p_rtable));
-    shm_filename    = shm_toc_allocate(toc, strlen(ActivePortal->sourceText) * sizeof(char));
-    // shm_attnamelist = shm_toc_allocate(toc, sizeof(*attnamelist));
-    shm_options     = shm_toc_allocate(toc, sizeof(*options));
-
-    *shm_p_rtable    = *pstate->p_rtable;
-    // *shm_attnamelist = *attnamelist;
-    *shm_options     = *options;
-    strcpy(shm_filename, ActivePortal->sourceText);
-
-    shm_toc_insert(toc, toc_key++, shm_p_rtable);
-    shm_toc_insert(toc, toc_key++, shm_filename);
-    // shm_toc_insert(toc, toc_key++, shm_attnamelist);
-    shm_toc_insert(toc, toc_key++, shm_options);
-
-    // cst = shm_toc_allocate(toc, sizeof(CopyStateData) + strlen(cstate->filename) * sizeof(char));
-    // *cst = *cstate;
-    // cst->filename = shm_toc_allocate(toc, strlen(cstate->filename) * sizeof(char));
-    // // *cst->filename = *cstate->filename;
-    // strcpy(cst->filename, cstate->filename);
-    // // memcpy(cst->filename, cstate->filename, sizeof(*cstate->filename));
-    // // memcpy(cstate, cst, sizeof(CopyStateData));
-    // shm_toc_insert(toc, 1, cst);
+    shm_toc_insert(toc, toc_key++, shm_query);

     /* Set up one message queue per worker, plus one. */
     for (i = 0; i < nworkers; ++i)
@@ -5807,436 +6129,6 @@ wait_for_workers_to_finish(volatile ParallelState *pst)
     ConditionVariableCancelSleep();
 }

-// /*
-//  * Parse the current line into separate attributes (fields),
-//  * performing de-escaping as needed.
-//  *
-//  * The input is in line_buf.  We use attribute_buf to hold the result
-//  * strings.  cstate->raw_fields[k] is set to point to the k'th attribute
-//  * string, or NULL when the input matches the null marker string.
-//  * This array is expanded as necessary.
-//  *
-//  * (Note that the caller cannot check for nulls since the returned
-//  * string would be the post-de-escaping equivalent, which may look
-//  * the same as some valid data string.)
-//  *
-//  * delim is the column delimiter string (must be just one byte for now).
-//  * null_print is the null marker string.  Note that this is compared to
-//  * the pre-de-escaped input string.
-//  *
-//  * The return value is the number of fields actually read.
-//  */
-// static int
-// CopyReadTextAttrs(CopyState cstate)
-// {
-//     char        delimc = cstate->delim[0];
-//     int            fieldno;
-//     char       *output_ptr;
-//     char       *cur_ptr;
-//     char       *line_end_ptr;
-//
-//     /*
-//      * We need a special case for zero-column tables: check that the input
-//      * line is empty, and return.
-//      */
-//     if (cstate->max_fields <= 0)
-//     {
-//         if (cstate->line_buf.len != 0)
-//             ereport(ERROR,
-//                     (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
-//                      errmsg("extra data after last expected column")));
-//         return 0;
-//     }
-//
-//     resetStringInfo(&cstate->attribute_buf);
-//
-//     /*
-//      * The de-escaped attributes will certainly not be longer than the input
-//      * data line, so we can just force attribute_buf to be large enough and
-//      * then transfer data without any checks for enough space.  We need to do
-//      * it this way because enlarging attribute_buf mid-stream would invalidate
-//      * pointers already stored into cstate->raw_fields[].
-//      */
-//     if (cstate->attribute_buf.maxlen <= cstate->line_buf.len)
-//         enlargeStringInfo(&cstate->attribute_buf, cstate->line_buf.len);
-//     output_ptr = cstate->attribute_buf.data;
-//
-//     /* set pointer variables for loop */
-//     cur_ptr = cstate->line_buf.data;
-//     line_end_ptr = cstate->line_buf.data + cstate->line_buf.len;
-//
-//     /* Outer loop iterates over fields */
-//     fieldno = 0;
-//     for (;;)
-//     {
-//         bool        found_delim = false;
-//         char       *start_ptr;
-//         char       *end_ptr;
-//         int            input_len;
-//         bool        saw_non_ascii = false;
-//
-//         /* Make sure there is enough space for the next value */
-//         if (fieldno >= cstate->max_fields)
-//         {
-//             cstate->max_fields *= 2;
-//             cstate->raw_fields =
-//                 repalloc(cstate->raw_fields, cstate->max_fields * sizeof(char *));
-//         }
-//
-//         /* Remember start of field on both input and output sides */
-//         start_ptr = cur_ptr;
-//         cstate->raw_fields[fieldno] = output_ptr;
-//
-//         /*
-//          * Scan data for field.
-//          *
-//          * Note that in this loop, we are scanning to locate the end of field
-//          * and also speculatively performing de-escaping.  Once we find the
-//          * end-of-field, we can match the raw field contents against the null
-//          * marker string.  Only after that comparison fails do we know that
-//          * de-escaping is actually the right thing to do; therefore we *must
-//          * not* throw any syntax errors before we've done the null-marker
-//          * check.
-//          */
-//         for (;;)
-//         {
-//             char        c;
-//
-//             end_ptr = cur_ptr;
-//             if (cur_ptr >= line_end_ptr)
-//                 break;
-//             c = *cur_ptr++;
-//             if (c == delimc)
-//             {
-//                 found_delim = true;
-//                 break;
-//             }
-//             if (c == '\\')
-//             {
-//                 if (cur_ptr >= line_end_ptr)
-//                     break;
-//                 c = *cur_ptr++;
-//                 switch (c)
-//                 {
-//                     case '0':
-//                     case '1':
-//                     case '2':
-//                     case '3':
-//                     case '4':
-//                     case '5':
-//                     case '6':
-//                     case '7':
-//                         {
-//                             /* handle \013 */
-//                             int            val;
-//
-//                             val = OCTVALUE(c);
-//                             if (cur_ptr < line_end_ptr)
-//                             {
-//                                 c = *cur_ptr;
-//                                 if (ISOCTAL(c))
-//                                 {
-//                                     cur_ptr++;
-//                                     val = (val << 3) + OCTVALUE(c);
-//                                     if (cur_ptr < line_end_ptr)
-//                                     {
-//                                         c = *cur_ptr;
-//                                         if (ISOCTAL(c))
-//                                         {
-//                                             cur_ptr++;
-//                                             val = (val << 3) + OCTVALUE(c);
-//                                         }
-//                                     }
-//                                 }
-//                             }
-//                             c = val & 0377;
-//                             if (c == '\0' || IS_HIGHBIT_SET(c))
-//                                 saw_non_ascii = true;
-//                         }
-//                         break;
-//                     case 'x':
-//                         /* Handle \x3F */
-//                         if (cur_ptr < line_end_ptr)
-//                         {
-//                             char        hexchar = *cur_ptr;
-//
-//                             if (isxdigit((unsigned char) hexchar))
-//                             {
-//                                 int            val = GetDecimalFromHex(hexchar);
-//
-//                                 cur_ptr++;
-//                                 if (cur_ptr < line_end_ptr)
-//                                 {
-//                                     hexchar = *cur_ptr;
-//                                     if (isxdigit((unsigned char) hexchar))
-//                                     {
-//                                         cur_ptr++;
-//                                         val = (val << 4) + GetDecimalFromHex(hexchar);
-//                                     }
-//                                 }
-//                                 c = val & 0xff;
-//                                 if (c == '\0' || IS_HIGHBIT_SET(c))
-//                                     saw_non_ascii = true;
-//                             }
-//                         }
-//                         break;
-//                     case 'b':
-//                         c = '\b';
-//                         break;
-//                     case 'f':
-//                         c = '\f';
-//                         break;
-//                     case 'n':
-//                         c = '\n';
-//                         break;
-//                     case 'r':
-//                         c = '\r';
-//                         break;
-//                     case 't':
-//                         c = '\t';
-//                         break;
-//                     case 'v':
-//                         c = '\v';
-//                         break;
-//
-//                         /*
-//                          * in all other cases, take the char after '\'
-//                          * literally
-//                          */
-//                 }
-//             }
-//
-//             /* Add c to output string */
-//             *output_ptr++ = c;
-//         }
-//
-//         /* Check whether raw input matched null marker */
-//         input_len = end_ptr - start_ptr;
-//         if (input_len == cstate->null_print_len &&
-//             strncmp(start_ptr, cstate->null_print, input_len) == 0)
-//             cstate->raw_fields[fieldno] = NULL;
-//         else
-//         {
-//             /*
-//              * At this point we know the field is supposed to contain data.
-//              *
-//              * If we de-escaped any non-7-bit-ASCII chars, make sure the
-//              * resulting string is valid data for the db encoding.
-//              */
-//             if (saw_non_ascii)
-//             {
-//                 char       *fld = cstate->raw_fields[fieldno];
-//
-//                 pg_verifymbstr(fld, output_ptr - fld, false);
-//             }
-//         }
-//
-//         /* Terminate attribute value in output area */
-//         *output_ptr++ = '\0';
-//
-//         fieldno++;
-//         /* Done if we hit EOL instead of a delim */
-//         if (!found_delim)
-//             break;
-//     }
-//
-//     /* Clean up state of attribute_buf */
-//     output_ptr--;
-//     Assert(*output_ptr == '\0');
-//     cstate->attribute_buf.len = (output_ptr - cstate->attribute_buf.data);
-//
-//     return fieldno;
-// }
-//
-// /*
-//  * Parse the current line into separate attributes (fields),
-//  * performing de-escaping as needed.  This has exactly the same API as
-//  * CopyReadAttributesText, except we parse the fields according to
-//  * "standard" (i.e. common) CSV usage.
-//  */
-// static int
-// CopyReadCSVAttrs(CopyState cstate)
-// {
-//     char        delimc = cstate->delim[0];
-//     char        quotec = cstate->quote[0];
-//     char        escapec = cstate->escape[0];
-//     int            fieldno;
-//     char       *output_ptr;
-//     char       *cur_ptr;
-//     char       *line_end_ptr;
-//
-//     /*
-//      * We need a special case for zero-column tables: check that the input
-//      * line is empty, and return.
-//      */
-//     if (cstate->max_fields <= 0)
-//     {
-//         if (cstate->line_buf.len != 0)
-//             ereport(ERROR,
-//                     (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
-//                      errmsg("extra data after last expected column")));
-//         return 0;
-//     }
-//
-//     resetStringInfo(&cstate->attribute_buf);
-//
-//     /*
-//      * The de-escaped attributes will certainly not be longer than the input
-//      * data line, so we can just force attribute_buf to be large enough and
-//      * then transfer data without any checks for enough space.  We need to do
-//      * it this way because enlarging attribute_buf mid-stream would invalidate
-//      * pointers already stored into cstate->raw_fields[].
-//      */
-//     if (cstate->attribute_buf.maxlen <= cstate->line_buf.len)
-//         enlargeStringInfo(&cstate->attribute_buf, cstate->line_buf.len);
-//     output_ptr = cstate->attribute_buf.data;
-//
-//     /* set pointer variables for loop */
-//     cur_ptr = cstate->line_buf.data;
-//     line_end_ptr = cstate->line_buf.data + cstate->line_buf.len;
-//
-//     /* Outer loop iterates over fields */
-//     fieldno = 0;
-//     for (;;)
-//     {
-//         bool        found_delim = false;
-//         bool        saw_quote = false;
-//         char       *start_ptr;
-//         char       *end_ptr;
-//         int            input_len;
-//
-//         /* Make sure there is enough space for the next value */
-//         if (fieldno >= cstate->max_fields)
-//         {
-//             cstate->max_fields *= 2;
-//             cstate->raw_fields =
-//                 repalloc(cstate->raw_fields, cstate->max_fields * sizeof(char *));
-//         }
-//
-//         /* Remember start of field on both input and output sides */
-//         start_ptr = cur_ptr;
-//         cstate->raw_fields[fieldno] = output_ptr;
-//
-//         /*
-//          * Scan data for field,
-//          *
-//          * The loop starts in "not quote" mode and then toggles between that
-//          * and "in quote" mode. The loop exits normally if it is in "not
-//          * quote" mode and a delimiter or line end is seen.
-//          */
-//         for (;;)
-//         {
-//             char        c;
-//
-//             /* Not in quote */
-//             for (;;)
-//             {
-//                 end_ptr = cur_ptr;
-//                 if (cur_ptr >= line_end_ptr)
-//                     goto endfield;
-//                 c = *cur_ptr++;
-//                 /* unquoted field delimiter */
-//                 if (c == delimc)
-//                 {
-//                     found_delim = true;
-//                     goto endfield;
-//                 }
-//                 /* start of quoted field (or part of field) */
-//                 if (c == quotec)
-//                 {
-//                     saw_quote = true;
-//                     break;
-//                 }
-//                 /* Add c to output string */
-//                 *output_ptr++ = c;
-//             }
-//
-//             /* In quote */
-//             for (;;)
-//             {
-//                 end_ptr = cur_ptr;
-//                 if (cur_ptr >= line_end_ptr)
-//                     ereport(ERROR,
-//                             (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
-//                              errmsg("unterminated CSV quoted field")));
-//
-//                 c = *cur_ptr++;
-//
-//                 /* escape within a quoted field */
-//                 if (c == escapec)
-//                 {
-//                     /*
-//                      * peek at the next char if available, and escape it if it
-//                      * is an escape char or a quote char
-//                      */
-//                     if (cur_ptr < line_end_ptr)
-//                     {
-//                         char        nextc = *cur_ptr;
-//
-//                         if (nextc == escapec || nextc == quotec)
-//                         {
-//                             *output_ptr++ = nextc;
-//                             cur_ptr++;
-//                             continue;
-//                         }
-//                     }
-//                 }
-//
-//                 /*
-//                  * end of quoted field. Must do this test after testing for
-//                  * escape in case quote char and escape char are the same
-//                  * (which is the common case).
-//                  */
-//                 if (c == quotec)
-//                     break;
-//
-//                 /* Add c to output string */
-//                 *output_ptr++ = c;
-//             }
-//         }
-// endfield:
-//
-//         /* Terminate attribute value in output area */
-//         *output_ptr++ = '\0';
-//
-//         /* Check whether raw input matched null marker */
-//         input_len = end_ptr - start_ptr;
-//         if (!saw_quote && input_len == cstate->null_print_len &&
-//             strncmp(start_ptr, cstate->null_print, input_len) == 0)
-//             cstate->raw_fields[fieldno] = NULL;
-//
-//         fieldno++;
-//         /* Done if we hit EOL instead of a delim */
-//         if (!found_delim)
-//             break;
-//     }
-//
-//     /* Clean up state of attribute_buf */
-//     output_ptr--;
-//     Assert(*output_ptr == '\0');
-//     cstate->attribute_buf.len = (output_ptr - cstate->attribute_buf.data);
-//
-//     return fieldno;
-// }
-//
-// /*
-//  *
-//  */
-// bool
-// CopyFromReadAttributes(char *line, bool csv_mode, char ***fields, int *nfields)
-// {
-//     int fldct;
-//
-//     /* Parse the line into de-escaped field values */
-//     if (csv_mode)
-//         fldct = CopyReadCSVAttrs(cstate);
-//     else
-//         fldct = CopyReadTextAttrs(cstate);
-//
-//     *fields = cstate->raw_fields;
-//     *nfields = fldct;
-//     return true;
-// }

 /*
  * Parallel Copy FROM file to relation.
@@ -6256,36 +6148,13 @@ ParallelCopyFrom(CopyState cstate, ParseState *pstate,
     shm_mq_handle **mq_handles;
     shm_mq_result   shmq_res;
     int             last_worker_used = 0;
-    // int64           message = 0;
-    // char           *message;
-    // char           *message_contents = VARDATA_ANY(message);
-    // int             message_size = VARSIZE_ANY_EXHDR(message);
     int             message_size = sizeof(char);

     mq_handles = palloc0(sizeof(shm_mq_handle *) * nworkers);

     MemoryContext oldcontext = CurrentMemoryContext;

-    // TupleDesc    tupDesc;
-    // ResultRelInfo *resultRelInfo;
-    // EState       *estate = CreateExecutorState(); /* for ExecConstraints() */
-    // ExprContext *econtext;
-    // TupleTableSlot *myslot;
-    //
-    // ErrorContextCallback errcallback;
-    // CommandId    mycid = GetCurrentCommandId(true);
-    // int            hi_options = 0; /* start with default heap_insert options */
-    // BulkInsertState bistate;
-    // bool        useHeapMultiInsert;
-
-    // ResultRelInfo *saved_resultRelInfo = NULL;
-    // int            prev_leaf_part_index = -1;
     uint64        processed = 0;
-    // int            nBufferedTuples = 0;
-// #define MAX_BUFFERED_TUPLES 1000
-//     HeapTuple  *bufferedTuples = NULL;    /* initialize to silence warning */
-//     Size        bufferedTuplesSize = 0;
-//     int            firstBufferedLineNo = 0;

     // MQ size = 100 messages x 80 chars each
     pst = setup_parallel_copy_from(message_size * 80 * queue_size,
@@ -6297,46 +6166,14 @@ ParallelCopyFrom(CopyState cstate, ParseState *pstate,
         attnamelist,
         options);

-    // // BG Worker startup
-    // RegisterDynamicBackgroundWorker(&worker, &bgwhandle);
-    // bgwstatus = WaitForBackgroundWorkerStartup(bgwhandle, &bgwpid);
-    // elog(LOG, "Main COPY process (pid %d): BGWorker started (pid %d)", MyProcPid, bgwpid);
-    // // BG Worker startup
-
     elog(LOG, "Copying from file %s", cstate->filename);

 	for (;;)
 	{
-        // TupleTableSlot *slot;
-        // bool        skip_tuple;
-        // Oid            loaded_oid = InvalidOid;
-        //         int         next_cf_state; /* NextCopyFrom return state */
         bool done;

 		CHECK_FOR_INTERRUPTS();

-        // if (nBufferedTuples == 0)
-        // {
-        //     /*
-        //      * Reset the per-tuple exprcontext. We can only do this if the
-        //      * tuple buffer is empty. (Calling the context the per-tuple
-        //      * memory context is a bit of a misnomer now.)
-        //      */
-        //     ResetPerTupleExprContext(estate);
-        // }
-
-		/* Switch into its memory context */
-        // MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
-
-        // next_cf_state = NextCopyFrom(cstate, econtext, values, nulls, &loaded_oid);
-
-
-        // LWLockAcquire(CopyFromBgwLock, LW_EXCLUSIVE);
-        // // SpinLockAcquire(&pst->mutex);
-        // pst->curr_line++;
-        // // SpinLockRelease(&pst->mutex);
-        // LWLockRelease(CopyFromBgwLock);
-
     	/* on input just throw the header line away */
     	if (cstate->cur_lineno == 0 && cstate->header_line)
     	{
@@ -6349,7 +6186,8 @@ ParallelCopyFrom(CopyState cstate, ParseState *pstate,

     	/* Actually read the line into memory here */
     	done = CopyReadLine(cstate);
-        // elog(LOG, "Reading line #%d with status %d", cstate->cur_lineno, done);
+        // Sleep 1 second, to be sure, that BGWorkers consume lines, when master is still working.
+        // sleep(1);

     	/*
     	 * EOF at start of line means we're done.  If we see EOF after some
@@ -6373,15 +6211,8 @@ ParallelCopyFrom(CopyState cstate, ParseState *pstate,
         }
         else
         {
-            // message = pst->curr_line;
-            // cur_ptr = cstate->line_buf.data;
-            // line_end_ptr = cstate->line_buf.data + cstate->line_buf.len;
-            // message = (char *) palloc(cstate->line_buf.len);
-            // memcpy(message, cstate->line_buf.data, cstate->line_buf.len);
-            // shmq_res = shm_mq_send(mq_handles[++last_worker_used - 1], message_size, &message, false);
-            // elog(LOG, "Sending line #%d '%s' to BGWorker #%d", cstate->cur_lineno, message, last_worker_used + 1);
             elog(LOG, "Sending line #%d to BGWorker #%d", cstate->cur_lineno, last_worker_used + 1);
-            // shmq_res = shm_mq_send(mq_handles[++last_worker_used - 1], cstate->line_buf.len, message, false);
+
             shmq_res = shm_mq_send(mq_handles[++last_worker_used - 1], cstate->line_buf.len, cstate->line_buf.data, false);
             if (shmq_res != SHM_MQ_SUCCESS)
                 ereport(ERROR,
@@ -6390,323 +6221,21 @@ ParallelCopyFrom(CopyState cstate, ParseState *pstate,
             if (last_worker_used == nworkers)
                 last_worker_used = 0;
         }
-
-    //     if (!next_cf_state) {
-    //         break;
-    //     }
-    //         else if (next_cf_state == NCF_SUCCESS)
-    //         {
-    //             /* And now we can form the input tuple. */
-    //             tuple = heap_form_tuple(tupDesc, values, nulls);
-    //
-    //             if (loaded_oid != InvalidOid)
-    //                 HeapTupleSetOid(tuple, loaded_oid);
-    //
-    //             /*
-    //              * Constraints might reference the tableoid column, so initialize
-    //              * t_tableOid before evaluating them.
-    //              */
-    //             tuple->t_tableOid = RelationGetRelid(resultRelInfo->ri_RelationDesc);
-    //
-    //             /* Triggers and stuff need to be invoked in query context. */
-    //             MemoryContextSwitchTo(oldcontext);
-    //
-    //             /* Place tuple in tuple slot --- but slot shouldn't free it */
-    //             slot = myslot;
-    //             ExecStoreTuple(tuple, slot, InvalidBuffer, false);
-    //
-    //             /* Determine the partition to heap_insert the tuple into */
-    //             if (cstate->partition_dispatch_info)
-    //             {
-    //                 int            leaf_part_index;
-    //                 TupleConversionMap *map;
-    //
-    //                 /*
-    //                  * Away we go ... If we end up not finding a partition after all,
-    //                  * ExecFindPartition() does not return and errors out instead.
-    //                  * Otherwise, the returned value is to be used as an index into
-    //                  * arrays mt_partitions[] and mt_partition_tupconv_maps[] that
-    //                  * will get us the ResultRelInfo and TupleConversionMap for the
-    //                  * partition, respectively.
-    //                  */
-    //                 leaf_part_index = ExecFindPartition(resultRelInfo,
-    //                                                     cstate->partition_dispatch_info,
-    //                                                     slot,
-    //                                                     estate);
-    //                 Assert(leaf_part_index >= 0 &&
-    //                        leaf_part_index < cstate->num_partitions);
-    //
-    //                 /*
-    //                  * If this tuple is mapped to a partition that is not same as the
-    //                  * previous one, we'd better make the bulk insert mechanism gets a
-    //                  * new buffer.
-    //                  */
-    //                 if (prev_leaf_part_index != leaf_part_index)
-    //                 {
-    //                     ReleaseBulkInsertStatePin(bistate);
-    //                     prev_leaf_part_index = leaf_part_index;
-    //                 }
-    //
-    //                 /*
-    //                  * Save the old ResultRelInfo and switch to the one corresponding
-    //                  * to the selected partition.
-    //                  */
-    //                 saved_resultRelInfo = resultRelInfo;
-    //                 resultRelInfo = cstate->partitions + leaf_part_index;
-    //
-    //                 /* We do not yet have a way to insert into a foreign partition */
-    //                 if (resultRelInfo->ri_FdwRoutine)
-    //                     ereport(ERROR,
-    //                             (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-    //                              errmsg("cannot route inserted tuples to a foreign table")));
-    //
-    //                 /*
-    //                  * For ExecInsertIndexTuples() to work on the partition's indexes
-    //                  */
-    //                 estate->es_result_relation_info = resultRelInfo;
-    //
-    //                 /*
-    //                  * If we're capturing transition tuples, we might need to convert
-    //                  * from the partition rowtype to parent rowtype.
-    //                  */
-    //                 if (cstate->transition_capture != NULL)
-    //                 {
-    //                     if (resultRelInfo->ri_TrigDesc &&
-    //                         (resultRelInfo->ri_TrigDesc->trig_insert_before_row ||
-    //                          resultRelInfo->ri_TrigDesc->trig_insert_instead_row))
-    //                     {
-    //                         /*
-    //                          * If there are any BEFORE or INSTEAD triggers on the
-    //                          * partition, we'll have to be ready to convert their
-    //                          * result back to tuplestore format.
-    //                          */
-    //                         cstate->transition_capture->tcs_original_insert_tuple = NULL;
-    //                         cstate->transition_capture->tcs_map =
-    //                             cstate->transition_tupconv_maps[leaf_part_index];
-    //                     }
-    //                     else
-    //                     {
-    //                         /*
-    //                          * Otherwise, just remember the original unconverted
-    //                          * tuple, to avoid a needless round trip conversion.
-    //                          */
-    //                         cstate->transition_capture->tcs_original_insert_tuple = tuple;
-    //                         cstate->transition_capture->tcs_map = NULL;
-    //                     }
-    //                 }
-    //                 /*
-    //                  * We might need to convert from the parent rowtype to the
-    //                  * partition rowtype.
-    //                  */
-    //                 map = cstate->partition_tupconv_maps[leaf_part_index];
-    //                 if (map)
-    //                 {
-    //                     Relation    partrel = resultRelInfo->ri_RelationDesc;
-    //
-    //                     tuple = do_convert_tuple(tuple, map);
-    //
-    //                     /*
-    //                      * We must use the partition's tuple descriptor from this
-    //                      * point on.  Use a dedicated slot from this point on until
-    //                      * we're finished dealing with the partition.
-    //                      */
-    //                     slot = cstate->partition_tuple_slot;
-    //                     Assert(slot != NULL);
-    //                     ExecSetSlotDescriptor(slot, RelationGetDescr(partrel));
-    //                     ExecStoreTuple(tuple, slot, InvalidBuffer, true);
-    //                 }
-    //
-    //                 tuple->t_tableOid = RelationGetRelid(resultRelInfo->ri_RelationDesc);
-    //             }
-    //
-    //             skip_tuple = false;
-    //
-    //             /* BEFORE ROW INSERT Triggers */
-    //             if (resultRelInfo->ri_TrigDesc &&
-    //                 resultRelInfo->ri_TrigDesc->trig_insert_before_row)
-    //             {
-    //                 slot = ExecBRInsertTriggers(estate, resultRelInfo, slot);
-    //
-    //                 if (slot == NULL)    /* "do nothing" */
-    //                     skip_tuple = true;
-    //                 else                /* trigger might have changed tuple */
-    //                     tuple = ExecMaterializeSlot(slot);
-    //             }
-    //         }
-    //         else
-    //         {
-    //             skip_tuple = true;
-    //         }
-    //
-    //     if (!skip_tuple)
-    //     {
-    //         if (resultRelInfo->ri_TrigDesc &&
-    //             resultRelInfo->ri_TrigDesc->trig_insert_instead_row)
-    //         {
-    //             /* Pass the data to the INSTEAD ROW INSERT trigger */
-    //             ExecIRInsertTriggers(estate, resultRelInfo, slot);
-    //         }
-    //         else
-    //         {
-    //             /*
-    //              * We always check the partition constraint, including when
-    //              * the tuple got here via tuple-routing.  However we don't
-    //              * need to in the latter case if no BR trigger is defined on
-    //              * the partition.  Note that a BR trigger might modify the
-    //              * tuple such that the partition constraint is no longer
-    //              * satisfied, so we need to check in that case.
-    //              */
-    //             bool        check_partition_constr =
-    //             (resultRelInfo->ri_PartitionCheck != NIL);
-    //
-    //             if (saved_resultRelInfo != NULL &&
-    //                 !(resultRelInfo->ri_TrigDesc &&
-    //                   resultRelInfo->ri_TrigDesc->trig_insert_before_row))
-    //                 check_partition_constr = false;
-    //
-    //             /* Check the constraints of the tuple */
-    //             if (cstate->rel->rd_att->constr || check_partition_constr)
-    //                 ExecConstraints(resultRelInfo, slot, estate);
-    //
-    //             if (useHeapMultiInsert)
-    //             {
-    //                 /* Add this tuple to the tuple buffer */
-    //                 if (nBufferedTuples == 0)
-    //                     firstBufferedLineNo = cstate->cur_lineno;
-    //                 bufferedTuples[nBufferedTuples++] = tuple;
-    //                 bufferedTuplesSize += tuple->t_len;
-    //
-    //                 /*
-    //                  * If the buffer filled up, flush it.  Also flush if the
-    //                  * total size of all the tuples in the buffer becomes
-    //                  * large, to avoid using large amounts of memory for the
-    //                  * buffer when the tuples are exceptionally wide.
-    //                  */
-    //                 if (nBufferedTuples == MAX_BUFFERED_TUPLES ||
-    //                     bufferedTuplesSize > 65535)
-    //                 {
-    //                     CopyFromInsertBatch(cstate, estate, mycid, hi_options,
-    //                                         resultRelInfo, myslot, bistate,
-    //                                         nBufferedTuples, bufferedTuples,
-    //                                         firstBufferedLineNo);
-    //                     nBufferedTuples = 0;
-    //                     bufferedTuplesSize = 0;
-    //                 }
-    //             }
-    //             else
-    //             {
-    //                 List       *recheckIndexes = NIL;
-    //
-    //                 /* OK, store the tuple and create index entries for it */
-    //                 heap_insert(resultRelInfo->ri_RelationDesc, tuple, mycid,
-    //                             hi_options, bistate);
-    //
-    //                 if (resultRelInfo->ri_NumIndices > 0)
-    //                     recheckIndexes = ExecInsertIndexTuples(slot,
-    //                                                            &(tuple->t_self),
-    //                                                            estate,
-    //                                                            false,
-    //                                                            NULL,
-    //                                                            NIL);
-    //
-    //                 /* AFTER ROW INSERT Triggers */
-    //                 ExecARInsertTriggers(estate, resultRelInfo, tuple,
-    //                                      recheckIndexes, cstate->transition_capture);
-    //
-    //                 list_free(recheckIndexes);
-    //             }
-    //         }
-    //
-    //         /*
-    //          * We count only tuples not suppressed by a BEFORE INSERT trigger;
-    //          * this is the same definition used by execMain.c for counting
-    //          * tuples inserted by an INSERT command.
-    //          */
-    //         processed++;
-    //
-    //         if (saved_resultRelInfo)
-    //         {
-    //             resultRelInfo = saved_resultRelInfo;
-    //             estate->es_result_relation_info = resultRelInfo;
-    //         }
-    //     }
     }
-    //
-    // /* Flush any remaining buffered tuples */
-    // if (nBufferedTuples > 0)
-    //     CopyFromInsertBatch(cstate, estate, mycid, hi_options,
-    //                         resultRelInfo, myslot, bistate,
-    //                         nBufferedTuples, bufferedTuples,
-    //                         firstBufferedLineNo);
-    //
-    // /* Done, clean up */
-    // error_context_stack = errcallback.previous;
-    //
-    // FreeBulkInsertState(bistate);

     /* Wait for all workers to complete their work. */
     wait_for_workers_to_finish(pst);

-	MemoryContextSwitchTo(oldcontext);
-
-	/*
-	 * In the old protocol, tell pqcomm that we can process normal protocol
-	 * messages again.
-	 */
-	if (cstate->copy_dest == COPY_OLD_FE)
-		pq_endmsgread();
-
-	/* Execute AFTER STATEMENT insertion triggers */
-	ExecASInsertTriggers(pst->estate, pst->resultRelInfo, cstate->transition_capture);
-
-	/* Handle queued AFTER triggers */
-	AfterTriggerEndQuery(pst->estate);
+    // /* Wait to be signalled. */
+    // WaitLatch(MyLatch, WL_LATCH_SET, 0, PG_WAIT_EXTENSION);
+    // /* Reset the latch so we don't spin. */
+    // ResetLatch(MyLatch);

-	ExecResetTupleTable(pst->estate->es_tupleTable, false);
-
-	ExecCloseIndices(pst->resultRelInfo);
-
-	/* Close all the partitioned tables, leaf partitions, and their indices */
-	if (cstate->partition_dispatch_info)
-	{
-		int			i;
-
-		/*
-		 * Remember cstate->partition_dispatch_info[0] corresponds to the root
-		 * partitioned table, which we must not try to close, because it is
-		 * the main target table of COPY that will be closed eventually by
-		 * DoCopy().  Also, tupslot is NULL for the root partitioned table.
-		 */
-		for (i = 1; i < cstate->num_dispatch; i++)
-		{
-			PartitionDispatch pd = cstate->partition_dispatch_info[i];
-
-			heap_close(pd->reldesc, NoLock);
-			ExecDropSingleTupleTableSlot(pd->tupslot);
-		}
-		for (i = 0; i < cstate->num_partitions; i++)
-		{
-			ResultRelInfo *resultRelInfo = cstate->partitions + i;
-
-			ExecCloseIndices(resultRelInfo);
-			heap_close(resultRelInfo->ri_RelationDesc, NoLock);
-		}
-
-		/* Release the standalone partition tuple descriptor */
-		ExecDropSingleTupleTableSlot(cstate->partition_tuple_slot);
-	}
-
-	/* Close any trigger target relations */
-	ExecCleanUpTriggerState(pst->estate);
-
-	FreeExecutorState(pst->estate);
+    LWLockAcquire(CopyFromBgwLock, LW_EXCLUSIVE);
+    processed = pst->processed;
+    LWLockRelease(CopyFromBgwLock);

-	/*
-	 * If we skipped writing WAL, then we need to sync the heap (but not
-	 * indexes since those use WAL anyway)
-	 */
-	if (pst->hi_options & HEAP_INSERT_SKIP_WAL)
-		heap_sync(cstate->rel);
+	MemoryContextSwitchTo(oldcontext);

     /* Clean up. */
     dsm_detach(seg);
--
2.11.0


From db5271de4d82b0faaeb40abc2f3ee52fd7b7cef3 Mon Sep 17 00:00:00 2001
From: Alex K <alex.lumir@gmail.com>
Date: Sat, 19 Aug 2017 02:18:36 +0300
Subject: [PATCH 11/13] Per row log outputs commented

---
 src/backend/commands/copy.c | 26 ++++++++++++++++++--------
 1 file changed, 18 insertions(+), 8 deletions(-)

diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index 083291d1c7..2dec839563 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -1048,7 +1048,8 @@ DoCopy(ParseState *pstate, const CopyStmt *stmt,
 		cstate = BeginCopyFrom(pstate, rel, stmt->filename, stmt->is_program,
 							   NULL, stmt->attlist, stmt->options);

-        if (cstate->allow_parallel)	/* copy from file to database */
+        if (cstate->allow_parallel)    /* copy from file to database */
+        // if (false)
         {
     		*processed = ParallelCopyFrom(cstate, pstate, rel, stmt->filename, stmt->is_program,
 							              stmt->attlist, stmt->options);
@@ -5406,7 +5407,7 @@ CopyFromBgwMainLoop(Datum main_arg)
             elog(LOG, "BGWorker #%d: got zero-length message, stopping", myworkernumber);
             break;
         }
-        elog(LOG, "BGWorker #%d processing line: %s", myworkernumber, msg);
+        // elog(LOG, "BGWorker #%d processing line: %s", myworkernumber, msg);

         cstate->line_buf.data = msg;
         cstate->line_buf.len  = len;
@@ -5747,8 +5748,11 @@ CopyFromBgwMainLoop(Datum main_arg)
     // }
     LWLockRelease(CopyFromBgwLock);

+    elog(LOG, "BGWorker #%d processed: %lu", myworkernumber, processed);
+
     /* Signal main process that we are done. */
-    ConditionVariableBroadcast(&cv);
+    // ConditionVariableBroadcast(&cv);
+    SetLatch(&registrant->procLatch);

     // TODO Segmentation fault:
     // * frame #0: 0x0000000107519678 postgres`GetMemoryChunkContext(pointer=0x00007fdf268bf858) at memutils.h:124
@@ -6120,7 +6124,13 @@ wait_for_workers_to_finish(volatile ParallelState *pst)

         elog(LOG, "Going to sleep again");
         /* Wait for the workers to wake us up. */
-        ConditionVariableSleep(&cv, WAIT_EVENT_COPY_FROM_BGWORKERS_FINISHED);
+        // ConditionVariableSleep(&cv, WAIT_EVENT_COPY_FROM_BGWORKERS_FINISHED);
+
+        /* Wait to be signalled. */
+        WaitLatch(MyLatch, WL_LATCH_SET, 0, PG_WAIT_EXTENSION);
+
+        /* Reset the latch so we don't spin. */
+        ResetLatch(MyLatch);

         /* An interrupt may have occurred while we were waiting. */
         CHECK_FOR_INTERRUPTS();
@@ -6144,11 +6154,11 @@ ParallelCopyFrom(CopyState cstate, ParseState *pstate,
     ParallelState  *pst;
     dsm_segment    *seg;
     int32           nworkers = max_parallel_workers_per_gather;
-    int64           queue_size = 100;
+    int64           queue_size = 100000;
     shm_mq_handle **mq_handles;
     shm_mq_result   shmq_res;
     int             last_worker_used = 0;
-    int             message_size = sizeof(char);
+    int             message_size = sizeof(char) * 80;

     mq_handles = palloc0(sizeof(shm_mq_handle *) * nworkers);

@@ -6157,7 +6167,7 @@ ParallelCopyFrom(CopyState cstate, ParseState *pstate,
     uint64        processed = 0;

     // MQ size = 100 messages x 80 chars each
-    pst = setup_parallel_copy_from(message_size * 80 * queue_size,
+    pst = setup_parallel_copy_from(message_size * queue_size,
         nworkers,
         &seg,
         &mq_handles,
@@ -6211,7 +6221,7 @@ ParallelCopyFrom(CopyState cstate, ParseState *pstate,
         }
         else
         {
-            elog(LOG, "Sending line #%d to BGWorker #%d", cstate->cur_lineno, last_worker_used + 1);
+            // elog(LOG, "Sending line #%d to BGWorker #%d", cstate->cur_lineno, last_worker_used + 1);

             shmq_res = shm_mq_send(mq_handles[++last_worker_used - 1], cstate->line_buf.len, cstate->line_buf.data, false);
             if (shmq_res != SHM_MQ_SUCCESS)
--
2.11.0


From 4aca6121dff2e9629c586d7e6df4c8c10aabdc3d Mon Sep 17 00:00:00 2001
From: Alex K <alex.lumir@gmail.com>
Date: Sat, 19 Aug 2017 14:54:53 +0300
Subject: [PATCH 12/13] Do not allow parallel COPY FROM for temp tables, since
 they are not awailable inside BGWorker process. TODO check solution?

---
 src/backend/commands/copy.c | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index 2dec839563..de63a2bb4a 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -407,8 +407,8 @@ static void setup_dsm(int64 queue_size, int nworkers,
 static WorkerState *setup_background_workers(int nworkers,
                                              dsm_segment *seg);
 static void cleanup_background_workers(dsm_segment *seg, Datum arg);
-static void wait_for_workers_to_become_ready(WorkerState *wstate,
-                                             volatile ParallelState *pst);
+static void wait_for_workers(WorkerState *wstate,
+                    volatile ParallelState *pst);
 static bool check_worker_status(WorkerState *wstate);

 static void handle_sigterm(SIGNAL_ARGS);
@@ -1480,8 +1480,6 @@ BeginCopy(ParseState *pstate,
 	/* Allocate workspace and zero all fields */
 	cstate = (CopyStateData *) palloc0(sizeof(CopyStateData));

-    cstate->allow_parallel = IsNormalProcessingMode() && IsUnderPostmaster;
-
 	/*
 	 * We allocate everything used by a cstate in a new memory context. This
 	 * avoids memory leaks during repeated use of COPY in a query.
@@ -3091,6 +3089,10 @@ BeginCopyFrom(ParseState *pstate,
 	cstate->cur_attname = NULL;
 	cstate->cur_attval = NULL;

+    cstate->allow_parallel = IsNormalProcessingMode()
+                          && IsUnderPostmaster
+                          && !rel->rd_islocaltemp;
+
 	/* Set up variables to avoid per-attribute overhead. */
 	initStringInfo(&cstate->attribute_buf);
 	initStringInfo(&cstate->line_buf);
@@ -5827,7 +5829,7 @@ setup_parallel_copy_from(int64 queue_size, int32 nworkers, dsm_segment **segp,
     }

     /* Wait for workers to become ready. */
-    wait_for_workers_to_become_ready(wstate, pst);
+    wait_for_workers(wstate, pst);
     /* Wait to be signalled. */
     // WaitLatch(MyLatch, WL_LATCH_SET, 0, PG_WAIT_EXTENSION);
     /* Reset the latch so we don't spin. */
@@ -6033,7 +6035,7 @@ cleanup_background_workers(dsm_segment *seg, Datum arg)
 }

 static void
-wait_for_workers_to_become_ready(WorkerState *wstate,
+wait_for_workers(WorkerState *wstate,
                                  volatile ParallelState *pst)
 {
     bool result = false;
@@ -6154,11 +6156,11 @@ ParallelCopyFrom(CopyState cstate, ParseState *pstate,
     ParallelState  *pst;
     dsm_segment    *seg;
     int32           nworkers = max_parallel_workers_per_gather;
-    int64           queue_size = 100000;
     shm_mq_handle **mq_handles;
     shm_mq_result   shmq_res;
     int             last_worker_used = 0;
-    int             message_size = sizeof(char) * 80;
+    int             message_size = sizeof(char) * 80; // 80 chars each
+    int64           queue_size = 100000; // Number of messages in MQ

     mq_handles = palloc0(sizeof(shm_mq_handle *) * nworkers);

@@ -6166,7 +6168,6 @@ ParallelCopyFrom(CopyState cstate, ParseState *pstate,

     uint64        processed = 0;

-    // MQ size = 100 messages x 80 chars each
     pst = setup_parallel_copy_from(message_size * queue_size,
         nworkers,
         &seg,
--
2.11.0


From c56d117049c597646751d5919c6a9f7d49c5afb0 Mon Sep 17 00:00:00 2001
From: Alex K <alex.lumir@gmail.com>
Date: Thu, 24 Aug 2017 15:50:38 +0300
Subject: [PATCH 13/13] Do not allow parallel if is not top level

---
 src/backend/commands/copy.c | 74 ++++++++++++++++-----------------------------
 src/backend/tcop/utility.c  |  2 +-
 src/include/commands/copy.h | 10 ++----
 3 files changed, 30 insertions(+), 56 deletions(-)

diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index de63a2bb4a..d4b506257d 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -237,8 +237,6 @@ typedef struct CopyStateData
 	char	   *raw_buf;
 	int			raw_buf_index;	/* next byte to process */
 	int			raw_buf_len;	/* total # of bytes stored */
-
-    bool        allow_parallel;
 } CopyStateData;

 /* DestReceiver for COPY (query) TO */
@@ -392,18 +390,13 @@ static bool CopyGetInt16(CopyState cstate, int16 *val);


 static ParallelState* setup_parallel_copy_from(int64 queue_size, int32 nworkers,
-                                   dsm_segment **segp, shm_mq_handle **mq_handles[], CopyState cstate,
-                                   ParseState *pstate,
-                                   List *attnamelist,
-                                   List *options);
+                                   dsm_segment **segp, shm_mq_handle **mq_handles[],
+                                   const char *query_string);
 static void setup_dsm(int64 queue_size, int nworkers,
                                         dsm_segment **segp,
                                         ParallelState **pstp,
                                         shm_mq **mqs[],
-                                        CopyState cstate,
-                                        ParseState *pstate,
-                                        List *attnamelist,
-                                        List *options);
+                                        const char *query_string);
 static WorkerState *setup_background_workers(int nworkers,
                                              dsm_segment *seg);
 static void cleanup_background_workers(dsm_segment *seg, Datum arg);
@@ -855,7 +848,8 @@ CopyLoadRawBuf(CopyState cstate)
 void
 DoCopy(ParseState *pstate, const CopyStmt *stmt,
 	   int stmt_location, int stmt_len,
-	   uint64 *processed)
+	   uint64 *processed, const char *query_string,
+       bool is_top_level)
 {
 	CopyState	cstate;
 	bool		is_from = stmt->is_from;
@@ -1038,6 +1032,11 @@ DoCopy(ParseState *pstate, const CopyStmt *stmt,

 	if (is_from)
 	{
+        bool allow_parallel = IsNormalProcessingMode()
+                           && IsUnderPostmaster
+                           && !rel->rd_islocaltemp
+                           && is_top_level;
+
 		Assert(rel);

 		/* check read-only transaction and parallel mode */
@@ -1048,11 +1047,9 @@ DoCopy(ParseState *pstate, const CopyStmt *stmt,
 		cstate = BeginCopyFrom(pstate, rel, stmt->filename, stmt->is_program,
 							   NULL, stmt->attlist, stmt->options);

-        if (cstate->allow_parallel)    /* copy from file to database */
-        // if (false)
+        if (allow_parallel)    /* copy from file to database */
         {
-    		*processed = ParallelCopyFrom(cstate, pstate, rel, stmt->filename, stmt->is_program,
-							              stmt->attlist, stmt->options);
+    		*processed = ParallelCopyFrom(cstate, query_string);
         }
         else
         {
@@ -3089,10 +3086,6 @@ BeginCopyFrom(ParseState *pstate,
 	cstate->cur_attname = NULL;
 	cstate->cur_attval = NULL;

-    cstate->allow_parallel = IsNormalProcessingMode()
-                          && IsUnderPostmaster
-                          && !rel->rd_islocaltemp;
-
 	/* Set up variables to avoid per-attribute overhead. */
 	initStringInfo(&cstate->attribute_buf);
 	initStringInfo(&cstate->line_buf);
@@ -5133,6 +5126,10 @@ CopyFromBgwMainLoop(Datum main_arg)
     BackgroundWorkerInitializeConnectionByOid(pst->database_id,
                                               pst->authenticated_user_id);

+    StartTransactionCommand();
+
+    PushActiveSnapshot(GetTransactionSnapshot());
+
     /*
      * Set the client encoding to the database encoding, since that is what
      * the leader will expect.
@@ -5151,7 +5148,7 @@ CopyFromBgwMainLoop(Datum main_arg)
     pstmt = lfirst_node(PlannedStmt, list_head(plantree_list));
     cstmnt = (CopyStmt *) pstmt->utilityStmt;

-    elog(LOG, "BGWorker #%d filename from CopyStmt: %s", myworkernumber, cstmnt->filename);
+    // elog(LOG, "BGWorker #%d filename from CopyStmt: %s", myworkernumber, cstmnt->filename);

 	pstate = make_parsestate(NULL);
 	pstate->p_sourcetext = query_string;
@@ -5186,8 +5183,6 @@ CopyFromBgwMainLoop(Datum main_arg)
     registrant = BackendPidGetProc(MyBgworkerEntry->bgw_notify_pid);
     SetLatch(&registrant->procLatch);

-    StartTransactionCommand();
-
     /* Open and lock the relation, using the appropriate lock type. */
 	rel = heap_openrv(cstmnt->relation, RowExclusiveLock);
                       // (is_from ? RowExclusiveLock : AccessShareLock));
@@ -5195,6 +5190,8 @@ CopyFromBgwMainLoop(Datum main_arg)
     cstate = BeginCopyFrom(pstate, rel, cstmnt->filename, cstmnt->is_program,
 							   NULL, cstmnt->attlist, cstmnt->options);

+	Assert(cstate->rel);
+
     estate = CreateExecutorState(); /* for ExecConstraints() */
     mycid = GetCurrentCommandId(true);
     hi_options = 0; /* start with default heap_insert options */
@@ -5799,10 +5796,7 @@ handle_sigterm(SIGNAL_ARGS)
  */
 static ParallelState*
 setup_parallel_copy_from(int64 queue_size, int32 nworkers, dsm_segment **segp,
-                  shm_mq_handle ***mq_handles, CopyState cstate,
-                  ParseState *pstate,
-                  List *attnamelist,
-                  List *options)
+                  shm_mq_handle ***mq_handles, const char *query_string)
 {
     int             i;
     dsm_segment     *seg;
@@ -5813,10 +5807,7 @@ setup_parallel_copy_from(int64 queue_size, int32 nworkers, dsm_segment **segp,
     mqs = palloc0(sizeof(shm_mq *) * nworkers);

     /* Set up a dynamic shared memory segment. */
-    setup_dsm(queue_size, nworkers, &seg, &pst, &mqs, cstate,
-            pstate,
-            attnamelist,
-            options);
+    setup_dsm(queue_size, nworkers, &seg, &pst, &mqs, query_string);
     *segp = seg;

     /* Register background workers. */
@@ -5857,10 +5848,7 @@ setup_parallel_copy_from(int64 queue_size, int32 nworkers, dsm_segment **segp,
 static void
 setup_dsm(int64 queue_size, int nworkers,
                             dsm_segment **segp, ParallelState **pstp,
-                            shm_mq ***mqs, CopyState cstate,
-                            ParseState *pstate,
-                            List *attnamelist,
-                            List *options)
+                            shm_mq ***mqs, const char *query_string)
 {
     shm_toc_estimator e;
     int               i;
@@ -5871,8 +5859,6 @@ setup_dsm(int64 queue_size, int nworkers,
     ParallelState    *pst;
     char             *shm_query;

-	Assert(cstate->rel);
-
     /* Ensure a valid queue size. */
     if (queue_size < 0 || ((uint64) queue_size) < shm_mq_minimum_size)
         ereport(ERROR,
@@ -5896,7 +5882,7 @@ setup_dsm(int64 queue_size, int nworkers,
     shm_toc_initialize_estimator(&e);
     shm_toc_estimate_chunk(&e, sizeof(ParallelState));

-    shm_toc_estimate_chunk(&e, strlen(ActivePortal->sourceText) * sizeof(char));
+    shm_toc_estimate_chunk(&e, strlen(query_string) * sizeof(char));

     for (i = 0; i < nworkers; ++i)
         shm_toc_estimate_chunk(&e, (Size) queue_size);
@@ -5925,7 +5911,7 @@ setup_dsm(int64 queue_size, int nworkers,

     shm_query    = shm_toc_allocate(toc, strlen(ActivePortal->sourceText) * sizeof(char));

-    strcpy(shm_query, ActivePortal->sourceText);
+    strcpy(shm_query, query_string);

     shm_toc_insert(toc, toc_key++, shm_query);

@@ -6146,12 +6132,7 @@ wait_for_workers_to_finish(volatile ParallelState *pst)
  * Parallel Copy FROM file to relation.
  */
 extern uint64
-ParallelCopyFrom(CopyState cstate, ParseState *pstate,
-			  Relation rel,
-			  const char *filename,
-			  bool is_program,
-			  List *attnamelist,
-			  List *options)
+ParallelCopyFrom(CopyState cstate, const char *query_string)
 {
     ParallelState  *pst;
     dsm_segment    *seg;
@@ -6172,10 +6153,7 @@ ParallelCopyFrom(CopyState cstate, ParseState *pstate,
         nworkers,
         &seg,
         &mq_handles,
-        cstate,
-        pstate,
-        attnamelist,
-        options);
+        query_string);

     elog(LOG, "Copying from file %s", cstate->filename);

diff --git a/src/backend/tcop/utility.c b/src/backend/tcop/utility.c
index ddacac8774..11fa7af728 100644
--- a/src/backend/tcop/utility.c
+++ b/src/backend/tcop/utility.c
@@ -559,7 +559,7 @@ standard_ProcessUtility(PlannedStmt *pstmt,

 				DoCopy(pstate, (CopyStmt *) parsetree,
 					   pstmt->stmt_location, pstmt->stmt_len,
-					   &processed);
+					   &processed, queryString, isTopLevel);
 				if (completionTag)
 					snprintf(completionTag, COMPLETION_TAG_BUFSIZE,
 							 "COPY " UINT64_FORMAT, processed);
diff --git a/src/include/commands/copy.h b/src/include/commands/copy.h
index dbc769828f..5d5982f4c9 100644
--- a/src/include/commands/copy.h
+++ b/src/include/commands/copy.h
@@ -26,7 +26,8 @@ typedef int (*copy_data_source_cb) (void *outbuf, int minread, int maxread);

 extern void DoCopy(ParseState *state, const CopyStmt *stmt,
 	   int stmt_location, int stmt_len,
-	   uint64 *processed);
+	   uint64 *processed, const char *query_string,
+     bool is_top_level);

 extern void ProcessCopyOptions(ParseState *pstate, CopyState cstate, bool is_from, List *options);
 extern CopyState BeginCopyFrom(ParseState *pstate, Relation rel, const char *filename,
@@ -40,12 +41,7 @@ extern void CopyFromErrorCallback(void *arg);

 extern void CopyFromBgwMainLoop(Datum main_arg);
 extern uint64 CopyFrom(CopyState cstate);
-extern uint64 ParallelCopyFrom(CopyState cstate, ParseState *pstate,
-			  Relation rel,
-			  const char *filename,
-			  bool is_program,
-			  List *attnamelist,
-			  List *options);
+extern uint64 ParallelCopyFrom(CopyState cstate, const char *query_string);

 extern DestReceiver *CreateCopyDestReceiver(void);

--
2.11.0