Skip to content

Instantly share code, notes, and snippets.

@svilupp
Last active January 3, 2024 20:34
Show Gist options
  • Save svilupp/b2b0859dc90916fbab5c68cd5507621c to your computer and use it in GitHub Desktop.
Save svilupp/b2b0859dc90916fbab5c68cd5507621c to your computer and use it in GitHub Desktop.
1-billion-row-challenge
# Converted from https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CreateMeasurements.java
# Call via CLI (last arg is how many rows to create)
# Example: `julia create_measurements.jl 1000000`
using Random
# Define a struct equivalent to the Java record
struct WeatherStation
id::String
meanTemperature::Float64
end
# Function to simulate measurement
function measurement(ws::WeatherStation)
m = randn() * 10 + ws.meanTemperature
return round(m, digits=1)
end
# Main function
function main()
start = time()
if length(ARGS) != 1
println("Usage: create_measurements.jl <number of records to create>")
return
end
size = try
parse(Int, ARGS[1])
catch
println("Invalid value for <number of records to create>")
println("Usage: create_measurements.jl <number of records to create>")
return
end
# List of weather stations
stations =
[WeatherStation("Abha", 18.0),
WeatherStation("Abidjan", 26.0),
WeatherStation("Abéché", 29.4),
WeatherStation("Accra", 26.4),
WeatherStation("Addis Ababa", 16.0),
WeatherStation("Adelaide", 17.3),
WeatherStation("Aden", 29.1),
WeatherStation("Ahvaz", 25.4),
WeatherStation("Albuquerque", 14.0),
WeatherStation("Alexandra", 11.0),
WeatherStation("Alexandria", 20.0),
WeatherStation("Algiers", 18.2),
WeatherStation("Alice Springs", 21.0),
WeatherStation("Almaty", 10.0),
WeatherStation("Amsterdam", 10.2),
WeatherStation("Anadyr", -6.9),
WeatherStation("Anchorage", 2.8),
WeatherStation("Andorra la Vella", 9.8),
WeatherStation("Ankara", 12.0),
WeatherStation("Antananarivo", 17.9),
WeatherStation("Antsiranana", 25.2),
WeatherStation("Arkhangelsk", 1.3),
WeatherStation("Ashgabat", 17.1),
WeatherStation("Asmara", 15.6),
WeatherStation("Assab", 30.5),
WeatherStation("Astana", 3.5),
WeatherStation("Athens", 19.2),
WeatherStation("Atlanta", 17.0),
WeatherStation("Auckland", 15.2),
WeatherStation("Austin", 20.7),
WeatherStation("Baghdad", 22.77),
WeatherStation("Baguio", 19.5),
WeatherStation("Baku", 15.1),
WeatherStation("Baltimore", 13.1),
WeatherStation("Bamako", 27.8),
WeatherStation("Bangkok", 28.6),
WeatherStation("Bangui", 26.0),
WeatherStation("Banjul", 26.0),
WeatherStation("Barcelona", 18.2),
WeatherStation("Bata", 25.1),
WeatherStation("Batumi", 14.0),
WeatherStation("Beijing", 12.9),
WeatherStation("Beirut", 20.9),
WeatherStation("Belgrade", 12.5),
WeatherStation("Belize City", 26.7),
WeatherStation("Benghazi", 19.9),
WeatherStation("Bergen", 7.7),
WeatherStation("Berlin", 10.3),
WeatherStation("Bilbao", 14.7),
WeatherStation("Birao", 26.5),
WeatherStation("Bishkek", 11.3),
WeatherStation("Bissau", 27.0),
WeatherStation("Blantyre", 22.2),
WeatherStation("Bloemfontein", 15.6),
WeatherStation("Boise", 11.4),
WeatherStation("Bordeaux", 14.2),
WeatherStation("Bosaso", 30.0),
WeatherStation("Boston", 10.9),
WeatherStation("Bouaké", 26.0),
WeatherStation("Bratislava", 10.5),
WeatherStation("Brazzaville", 25.0),
WeatherStation("Bridgetown", 27.0),
WeatherStation("Brisbane", 21.4),
WeatherStation("Brussels", 10.5),
WeatherStation("Bucharest", 10.8),
WeatherStation("Budapest", 11.3),
WeatherStation("Bujumbura", 23.8),
WeatherStation("Bulawayo", 18.9),
WeatherStation("Burnie", 13.1),
WeatherStation("Busan", 15.0),
WeatherStation("Cabo San Lucas", 23.9),
WeatherStation("Cairns", 25.0),
WeatherStation("Cairo", 21.4),
WeatherStation("Calgary", 4.4),
WeatherStation("Canberra", 13.1),
WeatherStation("Cape Town", 16.2),
WeatherStation("Changsha", 17.4),
WeatherStation("Charlotte", 16.1),
WeatherStation("Chiang Mai", 25.8),
WeatherStation("Chicago", 9.8),
WeatherStation("Chihuahua", 18.6),
WeatherStation("Chișinău", 10.2),
WeatherStation("Chittagong", 25.9),
WeatherStation("Chongqing", 18.6),
WeatherStation("Christchurch", 12.2),
WeatherStation("City of San Marino", 11.8),
WeatherStation("Colombo", 27.4),
WeatherStation("Columbus", 11.7),
WeatherStation("Conakry", 26.4),
WeatherStation("Copenhagen", 9.1),
WeatherStation("Cotonou", 27.2),
WeatherStation("Cracow", 9.3),
WeatherStation("Da Lat", 17.9),
WeatherStation("Da Nang", 25.8),
WeatherStation("Dakar", 24.0),
WeatherStation("Dallas", 19.0),
WeatherStation("Damascus", 17.0),
WeatherStation("Dampier", 26.4),
WeatherStation("Dar es Salaam", 25.8),
WeatherStation("Darwin", 27.6),
WeatherStation("Denpasar", 23.7),
WeatherStation("Denver", 10.4),
WeatherStation("Detroit", 10.0),
WeatherStation("Dhaka", 25.9),
WeatherStation("Dikson", -11.1),
WeatherStation("Dili", 26.6),
WeatherStation("Djibouti", 29.9),
WeatherStation("Dodoma", 22.7),
WeatherStation("Dolisie", 24.0),
WeatherStation("Douala", 26.7),
WeatherStation("Dubai", 26.9),
WeatherStation("Dublin", 9.8),
WeatherStation("Dunedin", 11.1),
WeatherStation("Durban", 20.6),
WeatherStation("Dushanbe", 14.7),
WeatherStation("Edinburgh", 9.3),
WeatherStation("Edmonton", 4.2),
WeatherStation("El Paso", 18.1),
WeatherStation("Entebbe", 21.0),
WeatherStation("Erbil", 19.5),
WeatherStation("Erzurum", 5.1),
WeatherStation("Fairbanks", -2.3),
WeatherStation("Fianarantsoa", 17.9),
WeatherStation("Flores, Petén", 26.4),
WeatherStation("Frankfurt", 10.6),
WeatherStation("Fresno", 17.9),
WeatherStation("Fukuoka", 17.0),
WeatherStation("Gabès", 19.5),
WeatherStation("Gaborone", 21.0),
WeatherStation("Gagnoa", 26.0),
WeatherStation("Gangtok", 15.2),
WeatherStation("Garissa", 29.3),
WeatherStation("Garoua", 28.3),
WeatherStation("George Town", 27.9),
WeatherStation("Ghanzi", 21.4),
WeatherStation("Gjoa Haven", -14.4),
WeatherStation("Guadalajara", 20.9),
WeatherStation("Guangzhou", 22.4),
WeatherStation("Guatemala City", 20.4),
WeatherStation("Halifax", 7.5),
WeatherStation("Hamburg", 9.7),
WeatherStation("Hamilton", 13.8),
WeatherStation("Hanga Roa", 20.5),
WeatherStation("Hanoi", 23.6),
WeatherStation("Harare", 18.4),
WeatherStation("Harbin", 5.0),
WeatherStation("Hargeisa", 21.7),
WeatherStation("Hat Yai", 27.0),
WeatherStation("Havana", 25.2),
WeatherStation("Helsinki", 5.9),
WeatherStation("Heraklion", 18.9),
WeatherStation("Hiroshima", 16.3),
WeatherStation("Ho Chi Minh City", 27.4),
WeatherStation("Hobart", 12.7),
WeatherStation("Hong Kong", 23.3),
WeatherStation("Honiara", 26.5),
WeatherStation("Honolulu", 25.4),
WeatherStation("Houston", 20.8),
WeatherStation("Ifrane", 11.4),
WeatherStation("Indianapolis", 11.8),
WeatherStation("Iqaluit", -9.3),
WeatherStation("Irkutsk", 1.0),
WeatherStation("Istanbul", 13.9),
WeatherStation("İzmir", 17.9),
WeatherStation("Jacksonville", 20.3),
WeatherStation("Jakarta", 26.7),
WeatherStation("Jayapura", 27.0),
WeatherStation("Jerusalem", 18.3),
WeatherStation("Johannesburg", 15.5),
WeatherStation("Jos", 22.8),
WeatherStation("Juba", 27.8),
WeatherStation("Kabul", 12.1),
WeatherStation("Kampala", 20.0),
WeatherStation("Kandi", 27.7),
WeatherStation("Kankan", 26.5),
WeatherStation("Kano", 26.4),
WeatherStation("Kansas City", 12.5),
WeatherStation("Karachi", 26.0),
WeatherStation("Karonga", 24.4),
WeatherStation("Kathmandu", 18.3),
WeatherStation("Khartoum", 29.9),
WeatherStation("Kingston", 27.4),
WeatherStation("Kinshasa", 25.3),
WeatherStation("Kolkata", 26.7),
WeatherStation("Kuala Lumpur", 27.3),
WeatherStation("Kumasi", 26.0),
WeatherStation("Kunming", 15.7),
WeatherStation("Kuopio", 3.4),
WeatherStation("Kuwait City", 25.7),
WeatherStation("Kyiv", 8.4),
WeatherStation("Kyoto", 15.8),
WeatherStation("La Ceiba", 26.2),
WeatherStation("La Paz", 23.7),
WeatherStation("Lagos", 26.8),
WeatherStation("Lahore", 24.3),
WeatherStation("Lake Havasu City", 23.7),
WeatherStation("Lake Tekapo", 8.7),
WeatherStation("Las Palmas de Gran Canaria", 21.2),
WeatherStation("Las Vegas", 20.3),
WeatherStation("Launceston", 13.1),
WeatherStation("Lhasa", 7.6),
WeatherStation("Libreville", 25.9),
WeatherStation("Lisbon", 17.5),
WeatherStation("Livingstone", 21.8),
WeatherStation("Ljubljana", 10.9),
WeatherStation("Lodwar", 29.3),
WeatherStation("Lomé", 26.9),
WeatherStation("London", 11.3),
WeatherStation("Los Angeles", 18.6),
WeatherStation("Louisville", 13.9),
WeatherStation("Luanda", 25.8),
WeatherStation("Lubumbashi", 20.8),
WeatherStation("Lusaka", 19.9),
WeatherStation("Luxembourg City", 9.3),
WeatherStation("Lviv", 7.8),
WeatherStation("Lyon", 12.5),
WeatherStation("Madrid", 15.0),
WeatherStation("Mahajanga", 26.3),
WeatherStation("Makassar", 26.7),
WeatherStation("Makurdi", 26.0),
WeatherStation("Malabo", 26.3),
WeatherStation("Malé", 28.0),
WeatherStation("Managua", 27.3),
WeatherStation("Manama", 26.5),
WeatherStation("Mandalay", 28.0),
WeatherStation("Mango", 28.1),
WeatherStation("Manila", 28.4),
WeatherStation("Maputo", 22.8),
WeatherStation("Marrakesh", 19.6),
WeatherStation("Marseille", 15.8),
WeatherStation("Maun", 22.4),
WeatherStation("Medan", 26.5),
WeatherStation("Mek'ele", 22.7),
WeatherStation("Melbourne", 15.1),
WeatherStation("Memphis", 17.2),
WeatherStation("Mexicali", 23.1),
WeatherStation("Mexico City", 17.5),
WeatherStation("Miami", 24.9),
WeatherStation("Milan", 13.0),
WeatherStation("Milwaukee", 8.9),
WeatherStation("Minneapolis", 7.8),
WeatherStation("Minsk", 6.7),
WeatherStation("Mogadishu", 27.1),
WeatherStation("Mombasa", 26.3),
WeatherStation("Monaco", 16.4),
WeatherStation("Moncton", 6.1),
WeatherStation("Monterrey", 22.3),
WeatherStation("Montreal", 6.8),
WeatherStation("Moscow", 5.8),
WeatherStation("Mumbai", 27.1),
WeatherStation("Murmansk", 0.6),
WeatherStation("Muscat", 28.0),
WeatherStation("Mzuzu", 17.7),
WeatherStation("N'Djamena", 28.3),
WeatherStation("Naha", 23.1),
WeatherStation("Nairobi", 17.8),
WeatherStation("Nakhon Ratchasima", 27.3),
WeatherStation("Napier", 14.6),
WeatherStation("Napoli", 15.9),
WeatherStation("Nashville", 15.4),
WeatherStation("Nassau", 24.6),
WeatherStation("Ndola", 20.3),
WeatherStation("New Delhi", 25.0),
WeatherStation("New Orleans", 20.7),
WeatherStation("New York City", 12.9),
WeatherStation("Ngaoundéré", 22.0),
WeatherStation("Niamey", 29.3),
WeatherStation("Nicosia", 19.7),
WeatherStation("Niigata", 13.9),
WeatherStation("Nouadhibou", 21.3),
WeatherStation("Nouakchott", 25.7),
WeatherStation("Novosibirsk", 1.7),
WeatherStation("Nuuk", -1.4),
WeatherStation("Odesa", 10.7),
WeatherStation("Odienné", 26.0),
WeatherStation("Oklahoma City", 15.9),
WeatherStation("Omaha", 10.6),
WeatherStation("Oranjestad", 28.1),
WeatherStation("Oslo", 5.7),
WeatherStation("Ottawa", 6.6),
WeatherStation("Ouagadougou", 28.3),
WeatherStation("Ouahigouya", 28.6),
WeatherStation("Ouarzazate", 18.9),
WeatherStation("Oulu", 2.7),
WeatherStation("Palembang", 27.3),
WeatherStation("Palermo", 18.5),
WeatherStation("Palm Springs", 24.5),
WeatherStation("Palmerston North", 13.2),
WeatherStation("Panama City", 28.0),
WeatherStation("Parakou", 26.8),
WeatherStation("Paris", 12.3),
WeatherStation("Perth", 18.7),
WeatherStation("Petropavlovsk-Kamchatsky", 1.9),
WeatherStation("Philadelphia", 13.2),
WeatherStation("Phnom Penh", 28.3),
WeatherStation("Phoenix", 23.9),
WeatherStation("Pittsburgh", 10.8),
WeatherStation("Podgorica", 15.3),
WeatherStation("Pointe-Noire", 26.1),
WeatherStation("Pontianak", 27.7),
WeatherStation("Port Moresby", 26.9),
WeatherStation("Port Sudan", 28.4),
WeatherStation("Port Vila", 24.3),
WeatherStation("Port-Gentil", 26.0),
WeatherStation("Portland (OR)", 12.4),
WeatherStation("Porto", 15.7),
WeatherStation("Prague", 8.4),
WeatherStation("Praia", 24.4),
WeatherStation("Pretoria", 18.2),
WeatherStation("Pyongyang", 10.8),
WeatherStation("Rabat", 17.2),
WeatherStation("Rangpur", 24.4),
WeatherStation("Reggane", 28.3),
WeatherStation("Reykjavík", 4.3),
WeatherStation("Riga", 6.2),
WeatherStation("Riyadh", 26.0),
WeatherStation("Rome", 15.2),
WeatherStation("Roseau", 26.2),
WeatherStation("Rostov-on-Don", 9.9),
WeatherStation("Sacramento", 16.3),
WeatherStation("Saint Petersburg", 5.8),
WeatherStation("Saint-Pierre", 5.7),
WeatherStation("Salt Lake City", 11.6),
WeatherStation("San Antonio", 20.8),
WeatherStation("San Diego", 17.8),
WeatherStation("San Francisco", 14.6),
WeatherStation("San Jose", 16.4),
WeatherStation("San José", 22.6),
WeatherStation("San Juan", 27.2),
WeatherStation("San Salvador", 23.1),
WeatherStation("Sana'a", 20.0),
WeatherStation("Santo Domingo", 25.9),
WeatherStation("Sapporo", 8.9),
WeatherStation("Sarajevo", 10.1),
WeatherStation("Saskatoon", 3.3),
WeatherStation("Seattle", 11.3),
WeatherStation("Ségou", 28.0),
WeatherStation("Seoul", 12.5),
WeatherStation("Seville", 19.2),
WeatherStation("Shanghai", 16.7),
WeatherStation("Singapore", 27.0),
WeatherStation("Skopje", 12.4),
WeatherStation("Sochi", 14.2),
WeatherStation("Sofia", 10.6),
WeatherStation("Sokoto", 28.0),
WeatherStation("Split", 16.1),
WeatherStation("St. John's", 5.0),
WeatherStation("St. Louis", 13.9),
WeatherStation("Stockholm", 6.6),
WeatherStation("Surabaya", 27.1),
WeatherStation("Suva", 25.6),
WeatherStation("Suwałki", 7.2),
WeatherStation("Sydney", 17.7),
WeatherStation("Tabora", 23.0),
WeatherStation("Tabriz", 12.6),
WeatherStation("Taipei", 23.0),
WeatherStation("Tallinn", 6.4),
WeatherStation("Tamale", 27.9),
WeatherStation("Tamanrasset", 21.7),
WeatherStation("Tampa", 22.9),
WeatherStation("Tashkent", 14.8),
WeatherStation("Tauranga", 14.8),
WeatherStation("Tbilisi", 12.9),
WeatherStation("Tegucigalpa", 21.7),
WeatherStation("Tehran", 17.0),
WeatherStation("Tel Aviv", 20.0),
WeatherStation("Thessaloniki", 16.0),
WeatherStation("Thiès", 24.0),
WeatherStation("Tijuana", 17.8),
WeatherStation("Timbuktu", 28.0),
WeatherStation("Tirana", 15.2),
WeatherStation("Toamasina", 23.4),
WeatherStation("Tokyo", 15.4),
WeatherStation("Toliara", 24.1),
WeatherStation("Toluca", 12.4),
WeatherStation("Toronto", 9.4),
WeatherStation("Tripoli", 20.0),
WeatherStation("Tromsø", 2.9),
WeatherStation("Tucson", 20.9),
WeatherStation("Tunis", 18.4),
WeatherStation("Ulaanbaatar", -0.4),
WeatherStation("Upington", 20.4),
WeatherStation("Ürümqi", 7.4),
WeatherStation("Vaduz", 10.1),
WeatherStation("Valencia", 18.3),
WeatherStation("Valletta", 18.8),
WeatherStation("Vancouver", 10.4),
WeatherStation("Veracruz", 25.4),
WeatherStation("Vienna", 10.4),
WeatherStation("Vientiane", 25.9),
WeatherStation("Villahermosa", 27.1),
WeatherStation("Vilnius", 6.0),
WeatherStation("Virginia Beach", 15.8),
WeatherStation("Vladivostok", 4.9),
WeatherStation("Warsaw", 8.5),
WeatherStation("Washington, D.C.", 14.6),
WeatherStation("Wau", 27.8),
WeatherStation("Wellington", 12.9),
WeatherStation("Whitehorse", -0.1),
WeatherStation("Wichita", 13.9),
WeatherStation("Willemstad", 28.0),
WeatherStation("Winnipeg", 3.0),
WeatherStation("Wrocław", 9.6),
WeatherStation("Xi'an", 14.1),
WeatherStation("Yakutsk", -8.8),
WeatherStation("Yangon", 27.5),
WeatherStation("Yaoundé", 23.8),
WeatherStation("Yellowknife", -4.3),
WeatherStation("Yerevan", 12.4),
WeatherStation("Yinchuan", 9.0),
WeatherStation("Zagreb", 10.7),
WeatherStation("Zanzibar City", 26.0),
WeatherStation("Zürich", 9.3)]
open("measurements.txt", "w") do file
for i in 1:size
if i > 1 && i % 50_000_000 == 0
println("Wrote $i measurements in $(time() - start) s")
end
station = stations[rand(1:length(stations))]
write(file, station.id * ";" * string(measurement(station)) * "\n")
end
end
println("Created file with $size measurements in $(time() - start) s")
end
main()
# Naive solution with DataFrames.jl
using CSV, DataFrames, Statistics
function main()
df = CSV.read("measurements.txt", DataFrame; header=[:station, :temperature], delim=";")
df_out = combine(groupby(df, :station), :temperature => mean, :temperature => minimum, :temperature => maximum)
transform!(df_out, [:temperature_mean, :temperature_minimum, :temperature_maximum] .=> ByRow(x -> round(x; digits=1)), renamecols=false)
sort!(df_out, :station)
for row in eachrow(df_out)
# print on new lines, instead of comma separated as the original challenge
println(row.station, "=", row.temperature_mean, "/", row.temperature_minimum, "/", row.temperature_maximum)
end
end
@time main()
# 1M rows: 0.9s
# 10M rows: 9.3s
# generated with GPT4 to read the stream
function calculate_stations(filename)
totals = Dict{String,Float64}()
counts = Dict{String,Int}()
mins = Dict{String,Float64}()
maxs = Dict{String,Float64}()
open(filename, "r") do file
while !eof(file)
station = readuntil(file, ';')
temp_str = readuntil(file, '\n')
temp = parse(Float64, temp_str)
# Update totals and counts
totals[station] = get(totals, station, 0.0) + temp
counts[station] = get(counts, station, 0) + 1
# Update minimum temperature
if !haskey(mins, station) || temp < mins[station]
mins[station] = temp
end
# Update maximum temperature
if !haskey(maxs, station) || temp > maxs[station]
maxs[station] = temp
end
end
end
# Calculate mean and sort the stations
sorted_stations = sort(collect(keys(totals)))
for station in sorted_stations
mean_temp = round(totals[station] / counts[station], digits=1)
min_temp = round(mins[station], digits=1)
max_temp = round(maxs[station], digits=1)
println("$station=$min_temp/$mean_temp/$max_temp")
end
end
# Usage
@time calculate_stations("measurements.txt")
# 10M rows: 5s
# 100M rows: 27s
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment