Alexey Kondratov ololobus

## copy-parallel.diff
From 22045780b95db74a99dc6e13e57413401da92c81 Mon Sep 17 00:00:00 2001
From: Alex K <alex.lumir@gmail.com>
Date: Mon, 10 Jul 2017 17:48:26 +0300
Subject: [PATCH 01/13] Dummy COPY FROM BGWorker v0.1

---
 src/backend/commands/copy.c       | 42 +++++++++++++++++++++++++++++++++++++++
 src/backend/postmaster/bgworker.c |  4 ++++
 src/include/commands/copy.h       |  1 +
 3 files changed, 47 insertions(+)

## copy-errors.diff
From 18f328c8a20dfdbd805921c6dcb6d665067507a5 Mon Sep 17 00:00:00 2001
From: Alex K <alex.lumir@gmail.com>
Date: Fri, 9 Jun 2017 23:41:51 +0300
Subject: [PATCH 1/8] Allow ignoring some errors during COPY FROM

---
 contrib/file_fdw/file_fdw.c |   4 +-
 src/backend/commands/copy.c | 352 +++++++++++++++++++++++++-------------------
 src/include/commands/copy.h |   2 +-
 3 files changed, 207 insertions(+), 151 deletions(-)

## 1-gsoc-final-submission.md

      
              2 files
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                ololobus
                / 1-gsoc-final-submission.md
            
            
              Last active
              September 6, 2017 12:32
            
              
                Final submission for a GSOC project 'Add errors handling and parallel execution to COPY'
              
          
    Final submission for a PostgreSQL GSOC'17 project 'Add errors handling and parallel execution to COPY'

Alexey Kondratov (kondratov.aleksey@gmail.com).
Errors handling in COPY FROM

Details

In my initial proposal I was planning to use subtransactions for errors handling, since it is the only one completely safe way to catch all possible errors during the COPY FROM execution. However, it would cause a serious problem – severe transactional IDs (XIDs) consumption hidden from the end-user. It may lead to a huge performance drop in the case, when errors are too frequent in the input data.

  
## create-and-fill-up-table.sql
CREATE TABLE large_test (num1 bigint, num2 double precision, num3 double precision);

INSERT INTO large_test (num1, num2, num3)
  SELECT round(random()*10), random(), random()*142
  FROM generate_series(1, 20000000) s(i);

EXPLAIN (analyse, buffers)
SELECT num1, avg(num3) as num3_avg, sum(num2) as num2_sum
FROM large_test
GROUP BY num1;

## info.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                ololobus
                / info.md
            
            
              Last active
              April 22, 2022 09:54
            
              
                GSOC'17 Application 
              
          
    Parallel COPY with errors handling

Resources


https://www.postgresql.org/message-id/flat/603c8f070909141218i291bc983t501507ebc996a531%40mail.gmail.com#603c8f070909141218i291bc983t501507ebc996a531@mail.gmail.com
https://github.com/ossc-db/pg_bulkload
http://paradigm4.com/HTMLmanual/13.3/scidb_ug/ch05s02s02.html
https://wiki.postgresql.org/wiki/Error_logging_in_COPY


## nls.R
df <- read.csv("data.csv")

# Column as 'array'
x <- df$col1
y <- df$col2

# Random seed
# set.seed(20170227)

a_start <- 5000

## postgres-pro.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                ololobus
                / postgres-pro.md
            
            
              Last active
              October 20, 2016 16:18
            
              
                Answers to https://xakep.ru/2016/08/11/coding-challenges-211/
              
          
    Задачи от Postgres Professional

[1]

select * from post 
inner join friend on friend.friend_usr_id = post.usr_id
where friend.usr_id = $1
order by post.added desc
limit 10;

  
## repeating-numbers.py
import re

pattern = re.compile('(\d*)(00|11|22|33|44|55|66|77|88|99)(\d*)')

n = 0

for i in range(1, 10000):
    s = str(i)
    if pattern.match(s):
        n += 1

## create_index.sql
UPDATE tus SET text1_tsvector = to_tsvector(COALESCE(lang1_psql, 'simple')::regconfig, COALESCE(text1, ''));
UPDATE tus SET text2_tsvector = to_tsvector(COALESCE(lang2_psql, 'simple')::regconfig, COALESCE(text2, ''));

/* CREATE INDEX text1_tsvector_idx ON tus USING gin(text1_tsvector);
CREATE INDEX text2_tsvector_idx ON tus USING gin(text2_tsvector); */
CREATE INDEX text_tsvector_idx ON tus USING gin(text1_tsvector, text2_tsvector);

CREATE FUNCTION text_tsvector_update() RETURNS TRIGGER AS $$
BEGIN
  IF TG_OP = 'INSERT' THEN

## README.md

      
              4 files
            
          
              0 forks
            
          
              0 comments
            
          
              1 star
            
          
                ololobus
                / README.md
            
            
              Last active
              July 12, 2018 09:44
            
              
                PostgreSQL benchmark: eval plainto_tsquery 6 times or eval it once and cache by WITH statement
              
          
    ###Results for real full-text search query
See tsquery_staight_eval.sql and tsquery_with_cache.sql.
Cache by WITH statement is 1.2-1.6 times faster than straight plainto_tsquery eval with simple text query.
	From 22045780b95db74a99dc6e13e57413401da92c81 Mon Sep 17 00:00:00 2001
	From: Alex K <alex.lumir@gmail.com>
	Date: Mon, 10 Jul 2017 17:48:26 +0300
	Subject: [PATCH 01/13] Dummy COPY FROM BGWorker v0.1

	---
	src/backend/commands/copy.c \| 42 +++++++++++++++++++++++++++++++++++++++
	src/backend/postmaster/bgworker.c \| 4 ++++
	src/include/commands/copy.h \| 1 +
	3 files changed, 47 insertions(+)
	From 18f328c8a20dfdbd805921c6dcb6d665067507a5 Mon Sep 17 00:00:00 2001
	From: Alex K <alex.lumir@gmail.com>
	Date: Fri, 9 Jun 2017 23:41:51 +0300
	Subject: [PATCH 1/8] Allow ignoring some errors during COPY FROM

	---
	contrib/file_fdw/file_fdw.c \| 4 +-
	src/backend/commands/copy.c \| 352 +++++++++++++++++++++++++-------------------
	src/include/commands/copy.h \| 2 +-
	3 files changed, 207 insertions(+), 151 deletions(-)
	CREATE TABLE large_test (num1 bigint, num2 double precision, num3 double precision);

	INSERT INTO large_test (num1, num2, num3)
	SELECT round(random()10), random(), random()142
	FROM generate_series(1, 20000000) s(i);

	EXPLAIN (analyse, buffers)
	SELECT num1, avg(num3) as num3_avg, sum(num2) as num2_sum
	FROM large_test
	GROUP BY num1;
	df <- read.csv("data.csv")

	# Column as 'array'
	x <- df$col1
	y <- df$col2

	# Random seed
	# set.seed(20170227)

	a_start <- 5000
	import re

	pattern = re.compile('(\d)(00\|11\|22\|33\|44\|55\|66\|77\|88\|99)(\d)')

	n = 0

	for i in range(1, 10000):
	s = str(i)
	if pattern.match(s):
	n += 1
	UPDATE tus SET text1_tsvector = to_tsvector(COALESCE(lang1_psql, 'simple')::regconfig, COALESCE(text1, ''));
	UPDATE tus SET text2_tsvector = to_tsvector(COALESCE(lang2_psql, 'simple')::regconfig, COALESCE(text2, ''));

	/* CREATE INDEX text1_tsvector_idx ON tus USING gin(text1_tsvector);
	CREATE INDEX text2_tsvector_idx ON tus USING gin(text2_tsvector); */
	CREATE INDEX text_tsvector_idx ON tus USING gin(text1_tsvector, text2_tsvector);

	CREATE FUNCTION text_tsvector_update() RETURNS TRIGGER AS $$
	BEGIN
	IF TG_OP = 'INSERT' THEN