Alexey Kondratov ololobus

## recommendations.py
#!/usr/bin/env spark-submit

import operator
import random
import math
import os

from pymongo import MongoClient


## badass-checker.py
#!/usr/bin/env python

import sys
import json

team_path = '/users/team%s/ozon_recoms.txt' % sys.argv[1]

badass_detected = False


## Rails_ActiveRecord_SQL_caching_bench.md

      
              2 files
            
          
              0 forks
            
          
              1 comment
            
          
              0 stars
            
          
                ololobus
                / Rails_ActiveRecord_SQL_caching_bench.md
            
            
              Last active
              March 11, 2023 00:25
            
              
                Rails ActiveRecord SQL Caching vs Cache to Hash catalog benchmark
              
          
    Results

Caching with ruby hash is about 1000 times faster than per iteration request to DB without Rails ActiveRecord SQL caching and about 200 times faster than per iteration request with SQL caching.

  
## README.md

      
              4 files
            
          
              0 forks
            
          
              0 comments
            
          
              1 star
            
          
                ololobus
                / README.md
            
            
              Last active
              July 12, 2018 09:44
            
              
                PostgreSQL benchmark: eval plainto_tsquery 6 times or eval it once and cache by WITH statement
              
          
    ###Results for real full-text search query
See tsquery_staight_eval.sql and tsquery_with_cache.sql.
Cache by WITH statement is 1.2-1.6 times faster than straight plainto_tsquery eval with simple text query.

  
## create_index.sql
UPDATE tus SET text1_tsvector = to_tsvector(COALESCE(lang1_psql, 'simple')::regconfig, COALESCE(text1, ''));
UPDATE tus SET text2_tsvector = to_tsvector(COALESCE(lang2_psql, 'simple')::regconfig, COALESCE(text2, ''));

/* CREATE INDEX text1_tsvector_idx ON tus USING gin(text1_tsvector);
CREATE INDEX text2_tsvector_idx ON tus USING gin(text2_tsvector); */
CREATE INDEX text_tsvector_idx ON tus USING gin(text1_tsvector, text2_tsvector);

CREATE FUNCTION text_tsvector_update() RETURNS TRIGGER AS $$
BEGIN
  IF TG_OP = 'INSERT' THEN

## repeating-numbers.py
import re

pattern = re.compile('(\d*)(00|11|22|33|44|55|66|77|88|99)(\d*)')

n = 0

for i in range(1, 10000):
    s = str(i)
    if pattern.match(s):
        n += 1

## postgres-pro.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                ololobus
                / postgres-pro.md
            
            
              Last active
              October 20, 2016 16:18
            
              
                Answers to https://xakep.ru/2016/08/11/coding-challenges-211/
              
          
    Задачи от Postgres Professional

[1]

select * from post 
inner join friend on friend.friend_usr_id = post.usr_id
where friend.usr_id = $1
order by post.added desc
limit 10;

  
## nls.R
df <- read.csv("data.csv")

# Column as 'array'
x <- df$col1
y <- df$col2

# Random seed
# set.seed(20170227)

a_start <- 5000

## info.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                ololobus
                / info.md
            
            
              Last active
              April 22, 2022 09:54
            
              
                GSOC'17 Application 
              
          
    Parallel COPY with errors handling

Resources


https://www.postgresql.org/message-id/flat/603c8f070909141218i291bc983t501507ebc996a531%40mail.gmail.com#603c8f070909141218i291bc983t501507ebc996a531@mail.gmail.com
https://github.com/ossc-db/pg_bulkload
http://paradigm4.com/HTMLmanual/13.3/scidb_ug/ch05s02s02.html
https://wiki.postgresql.org/wiki/Error_logging_in_COPY


## create-and-fill-up-table.sql
CREATE TABLE large_test (num1 bigint, num2 double precision, num3 double precision);

INSERT INTO large_test (num1, num2, num3)
  SELECT round(random()*10), random(), random()*142
  FROM generate_series(1, 20000000) s(i);

EXPLAIN (analyse, buffers)
SELECT num1, avg(num3) as num3_avg, sum(num2) as num2_sum
FROM large_test
GROUP BY num1;
	#!/usr/bin/env spark-submit

	import operator
	import random
	import math
	import os

	from pymongo import MongoClient
	#!/usr/bin/env python

	import sys
	import json

	team_path = '/users/team%s/ozon_recoms.txt' % sys.argv[1]

	badass_detected = False
	UPDATE tus SET text1_tsvector = to_tsvector(COALESCE(lang1_psql, 'simple')::regconfig, COALESCE(text1, ''));
	UPDATE tus SET text2_tsvector = to_tsvector(COALESCE(lang2_psql, 'simple')::regconfig, COALESCE(text2, ''));

	/* CREATE INDEX text1_tsvector_idx ON tus USING gin(text1_tsvector);
	CREATE INDEX text2_tsvector_idx ON tus USING gin(text2_tsvector); */
	CREATE INDEX text_tsvector_idx ON tus USING gin(text1_tsvector, text2_tsvector);

	CREATE FUNCTION text_tsvector_update() RETURNS TRIGGER AS $$
	BEGIN
	IF TG_OP = 'INSERT' THEN
	import re

	pattern = re.compile('(\d)(00\|11\|22\|33\|44\|55\|66\|77\|88\|99)(\d)')

	n = 0

	for i in range(1, 10000):
	s = str(i)
	if pattern.match(s):
	n += 1
	df <- read.csv("data.csv")

	# Column as 'array'
	x <- df$col1
	y <- df$col2

	# Random seed
	# set.seed(20170227)

	a_start <- 5000
	CREATE TABLE large_test (num1 bigint, num2 double precision, num3 double precision);

	INSERT INTO large_test (num1, num2, num3)
	SELECT round(random()10), random(), random()142
	FROM generate_series(1, 20000000) s(i);

	EXPLAIN (analyse, buffers)
	SELECT num1, avg(num3) as num3_avg, sum(num2) as num2_sum
	FROM large_test
	GROUP BY num1;