Skip to content

Instantly share code, notes, and snippets.

@FranckPachot
Last active January 6, 2024 12:50
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save FranckPachot/50a6a491b85b0ddb3da6399d54653085 to your computer and use it in GitHub Desktop.
Save FranckPachot/50a6a491b85b0ddb3da6399d54653085 to your computer and use it in GitHub Desktop.
1brc in SQL
-- SQL implementation of https://github.com/gunnarmorling/1brc
--
-- get the basic feed data as insert statements in weatherstations.sql
--
/* run in shell to build the 400 lines feed data that was hardcoded in CreateMeasurements.java :
curl -s https://raw.githubusercontent.com/gunnarmorling/1brc/main/src/main/java/dev/morling/onebrc/CreateMeasurements.java | awk '
# Oracle Database # BEGIN{print "begin"} END{ print "end;" ; print "/" }
# PostgreSQL or YugabyteDB # BEGIN{print "begin transaction"} END{ print "commit;" }
$0~re{gsub(q,q q);print gensub(re,"insert into weatherstations values ("q"\\1"q",\\2);",1)}
' q="'" re='^.*new WeatherStation[(]"([^"]*)", *([0-9.]*)[)][,)];?' | tee weatherstations.sql
*/
--
-- create table and load for feed data
--
create table weatherstations (
id varchar(30) not null primary key,
mean_temperature double precision not null
);
/* psql or sqlcl understand \i , sqlplus understands @ */
-- @ weatherstations.sql
\i weatherstations.sql
--
-- Load 1 million rows
--
/* psql defaults to autocommit, set it on oracle to commit each batch */
-- set autocommit on
/* using Oracle dbms_random, can be used from YugabyteDB and from PostgreSQL with orafce extension */
create extension if not exists orafce;
insert /*+ append */ into measurements(city, temperature)
with feed as (
select * from weatherstations order by dbms_random.value()
) , multiplicator as ( select null as x from feed fetch first 50 rows only) -- 50*50*400=1000000
select id, (floor(dbms_random.value() * 21) - 10) + mean_temperature + (dbms_random.value() * 0.1)
from feed cross join multiplicator x1 cross join multiplicator x2
fetch first 1000000 rows only
;
--
-- Load 1 billion rows by repeating 999 times ( sqlcl has repeat, psql has \watch )
--
-- repeat 999 1
\watch c=999
--
-- Get result
--
/* PostgreSQL and YugabyteDB */
-- explain (analyze on, dist on)
select string_agg(city_values,', ' order by city_values) from (
select format('%s=%s/%s/%s',city,min(temperature),round(avg(temperature)::numeric,1),max(temperature)) city_values
from measurements group by city
) agg_by_city
;
/* Oracle Database */
select /*+ parallel(8) gather_plan_statistics*/
select listagg(city_values, ', ') within group (order by city_values) as result
from (
select
'city=' || city || '/' || min(temperature) || '/' || round(avg(temperature), 1) || '/' || max(temperature) as city_values
from measurements
group by city
) agg_by_city;
select * from dbms_xplan.display_cursor(format=>'allstats last');
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment