Last active
January 5, 2024 16:37
-
-
Save KenBonny/d283123af1303764b7bb447baf0ca368 to your computer and use it in GitHub Desktop.
One Billion Rows F#
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
open System | |
open System.Diagnostics | |
open System.IO | |
[<Literal>] | |
let file = "./measurements.csv" | |
let stations = [ | |
"Abha" | |
"Abidjan" | |
"Abéché" | |
"Accra" | |
"Addis Ababa" | |
"Adelaide" | |
"Aden" | |
"Ahvaz" | |
"Albuquerque" | |
"Alexandra" | |
"Alexandria" | |
"Algiers" | |
"Alice Springs" | |
"Almaty" | |
"Amsterdam" | |
"Anadyr" | |
"Anchorage" | |
"Andorra la Vella" | |
"Ankara" | |
"Antananarivo" | |
"Antsiranana" | |
"Arkhangelsk" | |
"Ashgabat" | |
"Asmara" | |
"Assab" | |
"Astana" | |
"Athens" | |
"Atlanta" | |
"Auckland" | |
"Austin" | |
"Baghdad" | |
"Baguio" | |
"Baku" | |
"Baltimore" | |
"Bamako" | |
"Bangkok" | |
"Bangui" | |
"Banjul" | |
"Barcelona" | |
"Bata" | |
"Batumi" | |
"Beijing" | |
"Beirut" | |
"Belgrade" | |
"Belize City" | |
"Benghazi" | |
"Bergen" | |
"Berlin" | |
"Bilbao" | |
"Birao" | |
"Bishkek" | |
"Bissau" | |
"Blantyre" | |
"Bloemfontein" | |
"Boise" | |
"Bordeaux" | |
"Bosaso" | |
"Boston" | |
"Bouaké" | |
"Bratislava" | |
"Brazzaville" | |
"Bridgetown" | |
"Brisbane" | |
"Brussels" | |
"Bucharest" | |
"Budapest" | |
"Bujumbura" | |
"Bulawayo" | |
"Burnie" | |
"Busan" | |
"Cabo San Lucas" | |
"Cairns" | |
"Cairo" | |
"Calgary" | |
"Canberra" | |
"Cape Town" | |
"Changsha" | |
"Charlotte" | |
"Chiang Mai" | |
"Chicago" | |
"Chihuahua" | |
"Chișinău" | |
"Chittagong" | |
"Chongqing" | |
"Christchurch" | |
"City of San Marino" | |
"Colombo" | |
"Columbus" | |
"Conakry" | |
"Copenhagen" | |
"Cotonou" | |
"Cracow" | |
"Da Lat" | |
"Da Nang" | |
"Dakar" | |
"Dallas" | |
"Damascus" | |
"Dampier" | |
"Dar es Salaam" | |
"Darwin" | |
"Denpasar" | |
"Denver" | |
"Detroit" | |
"Dhaka" | |
"Dikson" | |
"Dili" | |
"Djibouti" | |
"Dodoma" | |
"Dolisie" | |
"Douala" | |
"Dubai" | |
"Dublin" | |
"Dunedin" | |
"Durban" | |
"Dushanbe" | |
"Edinburgh" | |
"Edmonton" | |
"El Paso" | |
"Entebbe" | |
"Erbil" | |
"Erzurum" | |
"Fairbanks" | |
"Fianarantsoa" | |
"Flores, Petén" | |
"Frankfurt" | |
"Fresno" | |
"Fukuoka" | |
"Gabès" | |
"Gaborone" | |
"Gagnoa" | |
"Gangtok" | |
"Garissa" | |
"Garoua" | |
"George Town" | |
"Ghanzi" | |
"Gjoa Haven" | |
"Guadalajara" | |
"Guangzhou" | |
"Guatemala City" | |
"Halifax" | |
"Hamburg" | |
"Hamilton" | |
"Hanga Roa" | |
"Hanoi" | |
"Harare" | |
"Harbin" | |
"Hargeisa" | |
"Hat Yai" | |
"Havana" | |
"Helsinki" | |
"Heraklion" | |
"Hiroshima" | |
"Ho Chi Minh City" | |
"Hobart" | |
"Hong Kong" | |
"Honiara" | |
"Honolulu" | |
"Houston" | |
"Ifrane" | |
"Indianapolis" | |
"Iqaluit" | |
"Irkutsk" | |
"Istanbul" | |
"İzmir" | |
"Jacksonville" | |
"Jakarta" | |
"Jayapura" | |
"Jerusalem" | |
"Johannesburg" | |
"Jos" | |
"Juba" | |
"Kabul" | |
"Kampala" | |
"Kandi" | |
"Kankan" | |
"Kano" | |
"Kansas City" | |
"Karachi" | |
"Karonga" | |
"Kathmandu" | |
"Khartoum" | |
"Kingston" | |
"Kinshasa" | |
"Kolkata" | |
"Kuala Lumpur" | |
"Kumasi" | |
"Kunming" | |
"Kuopio" | |
"Kuwait City" | |
"Kyiv" | |
"Kyoto" | |
"La Ceiba" | |
"La Paz" | |
"Lagos" | |
"Lahore" | |
"Lake Havasu City" | |
"Lake Tekapo" | |
"Las Palmas de Gran Canaria" | |
"Las Vegas" | |
"Launceston" | |
"Lhasa" | |
"Libreville" | |
"Lisbon" | |
"Livingstone" | |
"Ljubljana" | |
"Lodwar" | |
"Lomé" | |
"London" | |
"Los Angeles" | |
"Louisville" | |
"Luanda" | |
"Lubumbashi" | |
"Lusaka" | |
"Luxembourg City" | |
"Lviv" | |
"Lyon" | |
"Madrid" | |
"Mahajanga" | |
"Makassar" | |
"Makurdi" | |
"Malabo" | |
"Malé" | |
"Managua" | |
"Manama" | |
"Mandalay" | |
"Mango" | |
"Manila" | |
"Maputo" | |
"Marrakesh" | |
"Marseille" | |
"Maun" | |
"Medan" | |
"Mek'ele" | |
"Melbourne" | |
"Memphis" | |
"Mexicali" | |
"Mexico City" | |
"Miami" | |
"Milan" | |
"Milwaukee" | |
"Minneapolis" | |
"Minsk" | |
"Mogadishu" | |
"Mombasa" | |
"Monaco" | |
"Moncton" | |
"Monterrey" | |
"Montreal" | |
"Moscow" | |
"Mumbai" | |
"Murmansk" | |
"Muscat" | |
"Mzuzu" | |
"N'Djamena" | |
"Naha" | |
"Nairobi" | |
"Nakhon Ratchasima" | |
"Napier" | |
"Napoli" | |
"Nashville" | |
"Nassau" | |
"Ndola" | |
"New Delhi" | |
"New Orleans" | |
"New York City" | |
"Ngaoundéré" | |
"Niamey" | |
"Nicosia" | |
"Niigata" | |
"Nouadhibou" | |
"Nouakchott" | |
"Novosibirsk" | |
"Nuuk" | |
"Odesa" | |
"Odienné" | |
"Oklahoma City" | |
"Omaha" | |
"Oranjestad" | |
"Oslo" | |
"Ottawa" | |
"Ouagadougou" | |
"Ouahigouya" | |
"Ouarzazate" | |
"Oulu" | |
"Palembang" | |
"Palermo" | |
"Palm Springs" | |
"Palmerston North" | |
"Panama City" | |
"Parakou" | |
"Paris" | |
"Perth" | |
"Petropavlovsk-Kamchatsky" | |
"Philadelphia" | |
"Phnom Penh" | |
"Phoenix" | |
"Pittsburgh" | |
"Podgorica" | |
"Pointe-Noire" | |
"Pontianak" | |
"Port Moresby" | |
"Port Sudan" | |
"Port Vila" | |
"Port-Gentil" | |
"Portland (OR)" | |
"Porto" | |
"Prague" | |
"Praia" | |
"Pretoria" | |
"Pyongyang" | |
"Rabat" | |
"Rangpur" | |
"Reggane" | |
"Reykjavík" | |
"Riga" | |
"Riyadh" | |
"Rome" | |
"Roseau" | |
"Rostov-on-Don" | |
"Sacramento" | |
"Saint Petersburg" | |
"Saint-Pierre" | |
"Salt Lake City" | |
"San Antonio" | |
"San Diego" | |
"San Francisco" | |
"San Jose" | |
"San José" | |
"San Juan" | |
"San Salvador" | |
"Sana'a" | |
"Santo Domingo" | |
"Sapporo" | |
"Sarajevo" | |
"Saskatoon" | |
"Seattle" | |
"Ségou" | |
"Seoul" | |
"Seville" | |
"Shanghai" | |
"Singapore" | |
"Skopje" | |
"Sochi" | |
"Sofia" | |
"Sokoto" | |
"Split" | |
"St. John's" | |
"St. Louis" | |
"Stockholm" | |
"Surabaya" | |
"Suva" | |
"Suwałki" | |
"Sydney" | |
"Tabora" | |
"Tabriz" | |
"Taipei" | |
"Tallinn" | |
"Tamale" | |
"Tamanrasset" | |
"Tampa" | |
"Tashkent" | |
"Tauranga" | |
"Tbilisi" | |
"Tegucigalpa" | |
"Tehran" | |
"Tel Aviv" | |
"Thessaloniki" | |
"Thiès" | |
"Tijuana" | |
"Timbuktu" | |
"Tirana" | |
"Toamasina" | |
"Tokyo" | |
"Toliara" | |
"Toluca" | |
"Toronto" | |
"Tripoli" | |
"Tromsø" | |
"Tucson" | |
"Tunis" | |
"Ulaanbaatar" | |
"Upington" | |
"Ürümqi" | |
"Vaduz" | |
"Valencia" | |
"Valletta" | |
"Vancouver" | |
"Veracruz" | |
"Vienna" | |
"Vientiane" | |
"Villahermosa" | |
"Vilnius" | |
"Virginia Beach" | |
"Vladivostok" | |
"Warsaw" | |
"Washington, D.C." | |
"Wau" | |
"Wellington" | |
"Whitehorse" | |
"Wichita" | |
"Willemstad" | |
"Winnipeg" | |
"Wrocław" | |
"Xi'an" | |
"Yakutsk" | |
"Yangon" | |
"Yaoundé" | |
"Yellowknife" | |
"Yerevan" | |
"Yinchuan" | |
"Zagreb" | |
"Zanzibar City" | |
"Zürich" | |
] | |
let rnd = Random(int DateTime.Now.Ticks) | |
let temp () = decimal (rnd.Next(-100, 500)) / 10m | |
let writer = new StreamWriter(file, false) | |
let stopwatch = Stopwatch.StartNew () | |
let args = fsi.CommandLineArgs |> Array.tail | |
let rows = if args.Length > 0 then int args[0] else 1_000_000_000 | |
[1..rows] | |
|> List.map (fun _ -> | |
let station = stations[rnd.Next(stations.Length)] | |
writer.WriteLine($"{station};%g{temp ()}") | |
) | |
stopwatch.Stop () | |
printfn $"Time to create data: {stopwatch.Elapsed}" | |
writer.Dispose () |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
type Numbers = { | |
mutable Min: decimal | |
mutable Max: decimal | |
mutable Total: decimal | |
mutable Count: decimal | |
} | |
let update newNumber numbers = | |
if (newNumber < numbers.Min) then | |
numbers.Min <- newNumber | |
else if (newNumber > numbers.Max) then | |
numbers.Max <- newNumber | |
numbers.Total <- numbers.Total + newNumber | |
numbers.Count <- numbers.Count + 1m | |
() | |
let mean numbers = System.Math.Round (numbers.Total / numbers.Count, 2) | |
let file = fsi.CommandLineArgs |> Array.tail |> String.concat "" | |
let reader = new System.IO.StreamReader(file) | |
let stopwatch = System.Diagnostics.Stopwatch.StartNew() | |
let stations = System.Collections.Generic.Dictionary<string, Numbers>() | |
while not reader.EndOfStream do | |
let line = reader.ReadLine() | |
let semicolonIndex = line.IndexOf ';' | |
let key = line.Substring(0, semicolonIndex) | |
let temp = decimal (line.Substring(semicolonIndex + 1)) | |
if stations.ContainsKey key then | |
update temp stations[key] | |
else | |
stations.Add(key, {Min = temp; Max = temp; Total = temp; Count = 1m}) | |
for item in stations |> Seq.sortBy (_.Key) do | |
let numbers = item.Value | |
printfn $"{item.Key} = {numbers.Min} / {mean numbers} / {item.Value.Max}" | |
stopwatch.Stop() | |
printfn $"Elapsed: {stopwatch.Elapsed}" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
dotnet fsi .\generate-rows.fsx 1000000000 | |
dotnet fsi .\pretty-slow-solution-to-parsing-one-billion-rows.fsx measurements.csv |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment