Skip to content

Instantly share code, notes, and snippets.

@tonsky
Last active October 3, 2021 07:11
Show Gist options
  • Star 5 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tonsky/881d5d8c4fbed818fe2905a7591a91e0 to your computer and use it in GitHub Desktop.
Save tonsky/881d5d8c4fbed818fe2905a7591a91e0 to your computer and use it in GitHub Desktop.
let fs = require('fs')
let parse = require('csv-parse/lib/sync')
function totalSalary(path) {
let csv = parse(fs.readFileSync(path), { columns: true })
return csv.reduce((total, row) => total + parseFloat(row.Salary, 10), 0)
}
def total_salary(path: str) -> list[float]:
with open(path) as fp:
return sorted((float(row['Salary']) for row in csv.DictReader(fp)), reverse=True)[:10]
def total_salary(path: str) -> list[float]:
with open(path) as fp:
return heapq.nlargest(10, (float(row['Salary']) for row in csv.DictReader(fp)))
import java.nio.file.*
import kotlin.io.*
fun top10Salaries(path: Path) =
path.toFile().readLines().let { lines ->
lines.firstOrNull()
?.split(',')
?.indexOf("Salary")
?.let { index ->
lines.drop(1)
.mapNotNull {
it.split(',')
.getOrNull(index)
?.toFloatOrNull()
}
.sortedBy { it }
.takeLast(10)
.forEach { println(it) }
}
}
open System.IO
let splitBy (c: char) (s: string) = s.Split(c)
let showTopSalaries number path =
let f = File.ReadLines path |> Seq.cache
let index =
f
|> Seq.head
|> splitBy ','
|> Seq.findIndex (fun s -> s = "Salary")
f
|> Seq.tail
|> Seq.map (splitBy ',' >> fun i -> i.[index] |> float)
|> Seq.sortDescending
|> Seq.take number
|> Seq.indexed
|> Seq.iter (fun (x,y) -> printfn "Top %d: %.2f $$$" (x+1) y)
(*
CSV file should have following headers
.__________+_____+________.
| FullName | Age | Salary |
!__________!_____!________!
*)
[<EntryPoint>]
let main argv =
argv.[0]
|> showTopSalaries 10
0
x = read.csv(“~/[path]”, header = T)
head(sort(x$Salary, decreasing=TRUE), n = 10)
prostaya_programma = do
zagolovki : dannye <- map (splitOn ",") . lines <$> readFile "input.csv"
let Just nomer_interesnoi_kolonki = elemIndex "Salary" zagolovki
dannye
& map (!! nomer_interesnoi_kolonki)
& map (read @Double)
& sortOn negate
& take 10
int idx = Files.lines(Path.of("path/to/file"))
.limit(1)
.map(line -> List.of(line.split(",")).indexOf("Salary"))
.findFirst().get();
Files.lines(Path.of("path/to/file"))
.map(line -> line.split(",")[idx])
.map(Double::parseDouble)
.sorted(Comparator.reverseOrder())
.limit(10)
.forEach(System.out::println);
import (
"encoding/csv"
"fmt"
"os"
"sort"
"strconv"
)
func topSalariesCSV(path string) ([]int, error) {
f, err := os.Open(path)
if err != nil {
return nil, err
}
defer func() {
if err := f.Close(); err != nil {
panic(err)
}
}()
reader := csv.NewReader(f)
lines, err := reader.ReadAll()
if err != nil {
return nil, fmt.Errorf("parse csv | %v", err)
}
col := sort.SearchStrings(lines[0], "Salary")
top := []int{}
for _, line := range lines[1:] {
v, err := strconv.Atoi(line[col])
if err != nil {
return nil, fmt.Errorf("atoi | %v", err)
}
top = append(top, v)
}
sort.Sort(sort.Reverse(sort.IntSlice(top)))
if len(top) > 10 {
top = top[0:10]
}
return top, nil
}
use bigdecimal::BigDecimal;
use serde::Deserialize;
use std::collections::BinaryHeap;
use std::cmp::Reverse;
const MAX: usize = 10;
#[derive(Debug, Deserialize)]
#[serde(rename_all(deserialize = "PascalCase"))]
struct Record {
salary: BigDecimal,
}
fn main() -> Result<(), anyhow::Error> {
let mut heap = BinaryHeap::with_capacity(MAX + 1);
let mut rdr = csv::Reader::from_reader(std::io::stdin());
for result in rdr.deserialize() {
let record: Record = result?;
heap.push(Reverse(record.salary));
if heap.len() > MAX {
heap.pop();
}
}
let vec = heap.into_sorted_vec();
eprintln!("{:#?}", vec);
Ok(())
}
$csv = @file_get_contents ($path);
$headings = explode (',', array_unshift ($csv));
$headings_ords = @array_flip ($headings);
$salary_ord = @$headings_ords['Salary'] or die ('Salary column not found');
$salaries = [];
foreach ($csv as $csv_line) {
$line_values = explode (',', $csv_line);
$salaries[] = $line_values[$salary_ord];
}
rsort ($salaries);
$top_salaries = array_splice ($salaries, 0, 10);
echo implode ("\n", $top_salaries);
cat /path/to/csv.csv \
| awk -FSalary 'NR==1{i=gsub(",",0,$1)+1;FS=","} NR>1{print($i)}' \
| sort -rn \
| head -n10
with open("table.tsv", "r") as f:
sal_col = f.__next__().rstrip().split(";").index("salary")
print(sorted([int(x.rstrip().split(";")[sal_col]) for x in f])[-10:])
sal_col = iter(open("table.tsv")).__next__().rstrip().split(";").index("salary")
print(sorted(map(int, [x.rstrip().split(";")[sal_col] for x in iter(open("table.tsv"))][1:]))[-10:][::-1])
const fs = require('fs')
fs.readFile(path, 'utf8' , (err, data) => {
let col = data.split('\n')[0].split(',').indexOf('Salary')
let salaries = data.split('\n').slice(1).map(line => line.split(',')[col]*1)
let top10 = salaries.sort().reverse().slice(0, 10)
})
perl -MText::CSV=csv -le 'print join "\n", (map { $_->{Salary} } sort { $b->{Salary} <=> $a->{Salary} } @{csv (in => $ARGV[0], headers => "auto")})[0..9]' path
require 'csv'
CSV.table('path/to/data.csv')[:salary].max(10)
var splitLines = File.ReadLines(fn).Select(l => l.Split(','));
var idx = Array.IndexOf(splitLines.First(), "salary");
var salary = splitLines.Skip(1)
.Select(l => int.Parse(l[idx]))
.OrderByDesc(_ => _)
.Take(10)
.Sum();
jq -nR '[inputs | split(",")] | [.[1:][][first | index("salary")] | tonumber] | sort | reverse[:10][]' salaries.csv
use std::cmp::Reverse;
use std::collections::BinaryHeap;
use std::fs::File;
use std::io::{prelude::*, BufReader};
use std::str;
fn best_salaries(filename: &str) -> Option<Vec<u64>> {
let file = File::open(filename).ok()?;
let reader = BufReader::new(file);
let mut lines = reader.split(b'\n');
let header = lines.next()?.ok()?;
let salary_pos = header.split(|x| *x == b',').position(|x| x == b"Salary")?;
let iter = lines.filter_map(|x| {
str::from_utf8(x.ok()?.split(|x| *x == b',').nth(salary_pos)?)
.ok()?
.parse::<u64>()
.ok()
});
let mut heap = BinaryHeap::new();
for x in iter {
heap.push(Reverse(x));
if heap.len() > 10 {
heap.pop();
}
}
Some(heap.into_sorted_vec().into_iter().map(|x| x.0).collect())
}
fn main() {
dbg!(best_salaries("a.csv"));
}
fn total_salary(fname: &str) -> Vec<u64> {
let lines = std::fs::read_to_string(fname).unwrap();
let lines: Vec<Vec<_>> = lines
.split("\n")
.filter(|&x| x != "")
.map(|x| x.split(",").collect())
.collect();
let i = lines[0].iter().position(|&x| x == "Salary").unwrap();
let mut salaries: Vec<_> = lines[1..]
.iter()
.map(|s| s[i].parse::<u64>().unwrap())
.collect();
salaries.sort_by(|a, b| b.cmp(a));
salaries.into_iter().take(10).collect()
}
top10Salaries :: FilePath -> IO ()
top10Salaries path = do
Just (h, t) <- uncons . T.lines <$> T.readFile path
let
split = T.splitOn ","
Just ind = elemIndex "Salary" $ split h
top10 :: [Int] = t
& map (\s -> read $ T.unpack $ split s !! ind)
& sortBy (flip compare)
& take 10
forM_ top10 print
import scala.io.Source
def topTenSalaries(path: String): List[Double] = {
val header +: body = Source.fromFile(path).getLines().toList
val column = header.split(",").indexOf("Salary")
body
.map(s => s.split(",")(column).toDouble)
.sorted(Ordering[Double].reverse)
.take(10)
}
using CSV, DataFrames
df = CSV.read("foo.csv", DataFrame, delim=",")
println(first(sort!(df.Salary, rev=true),10))
#import <Foundation/Foundation.h>
int main(int argc, char **argv)
{
if (argc != 2) {
return 1;
}
NSString *inputPath = @(argv[1]);
NSError *error;
NSString *inputString = [NSString stringWithContentsOfFile:inputPath encoding:NSUTF8StringEncoding error:&error];
if (error != nil) {
return 1;
}
NSArray<NSString *> *lines = [inputString componentsSeparatedByString:@"\n"];
const NSUInteger lineCount = lines.count;
if (lineCount < 2) {
return 1;
}
NSArray<NSString *> *columnNames = [lines[0] componentsSeparatedByString:@","];
const NSUInteger salaryColumnIndex = [columnNames indexOfObject:@"Salary"];
if (salaryColumnIndex == NSNotFound) {
return 1;
}
NSMutableArray<NSNumber *> *topSalaries = [NSMutableArray new];
for (NSUInteger lineIndex = 1; lineIndex < lineCount; lineIndex++) {
NSString *line = lines[lineIndex];
if (line.length == 0) {
continue;
}
NSArray<NSString *> *values = [line componentsSeparatedByString:@","];
if (values.count - 1 < salaryColumnIndex) {
return 1;
}
const NSInteger salary = values[salaryColumnIndex].integerValue;
NSUInteger lo = 0;
NSUInteger hi = topSalaries.count;
while (lo < hi) {
const NSUInteger salaryIndex = (hi + lo) / 2;
const NSInteger topSalary = topSalaries[salaryIndex].integerValue;
if (salary < topSalary) {
lo = salaryIndex + 1;
} else if (salary > topSalary) {
hi = salaryIndex;
} else {
break;
}
}
[topSalaries insertObject:@(salary) atIndex:lo];
while (topSalaries.count > 10) {
[topSalaries removeLastObject];
}
}
for (NSUInteger idx = 0; idx < topSalaries.count; idx++) {
NSLog(@"%lu: %ld", idx + 1, topSalaries[idx].integerValue);
}
return 0;
}
require 'csv'
CSV.foreach('foo.csv', headers: true)
.map{ |row| row['Salary'].to_f }
.sort
.reverse[0,10]
.each{ |salary| puts salary }
$csv = array_map('str_getcsv', file("data.csv"));
$i = array_search("Salary", $csv[0]);
array_walk($csv, function(&$row) use ($i) { $row = $row[$i]; });
array_shift($csv);
arsort($csv);
$sortedSalary = array_slice($csv, 0, 10);
function totalSalary(path) {
return require('fs/promises')
.readFile('/etc/passwd')
.then(content => {
const lines = content.split('\n');
const column = lines[0].split(',').findIndex('Salary');
return lines
.slice(1).
.map(line => Number(line.split(',')[colum]))
.sort((a, b) => a - b) // i dunno, maybe b - a
.slice(0, 10)
;
})
;
}
sqlite> .import --csv salaries.csv salaries
sqlite> select * from salaries order by salary desc limit 10;
import Data.Char (isSpace)
import Data.List (elemIndex, sort)
import Data.List.Split (splitOn)
topSalaries :: String -> IO (Maybe [Int])
topSalaries path = do
head:rows <- lines <$> readFile path
return $ do
salaryIdx <- ("Salary" `elemIndex`) . splitOn "," $ head
return
. take 10 . reverse . sort
. map (read . (!! salaryIdx) . splitOn ",")
$ rows
def total_salary(path) do
[head | tail] = File.read!(path)
|> String.split("\n")
|> Enum.map(fn v -> String.split(v, ",") end)
col = Enum.find_index(head, fn v -> v == "Salary" end)
tail
|> Enum.map(fn v -> Enum.at(v, col) end)
|> Enum.sort()
|> Enum.reverse()
|> Enum.take(10)
end
require 'csv'
salaries = CSV.parse(File.read('./salaries.csv'), headers: true)
p salaries['Salary'].map(&:to_f).max(10)
(defn total-salary [path]
(with-open [rdr (io/reader path)]
(let [[header & body] (line-seq rdr)
col (.indexOf
(str/split header #",")
"Salary")]
(->> body
(map #(str/split % #","))
(map #(nth % col))
(map #(Double/parseDouble %))
(sort)
(reverse)
(take 10)))))
(defn top-10-salaries [path]
(with-open [rdr (io/reader path)]
(let [[header & body] (line-seq rdr)
col (.indexOf (str/split header #",") "Salary")]
(reduce
(fn [heap line]
(let [salary (-> line (str/split #",") (nth col) (Double/parseDouble))]
(cond
(< (count heap) 10) (doto heap (.add salary))
(> salary (.peek heap)) (doto heap (.poll) (.add salary))
:else heap)))
(java.util.PriorityQueue. 10) body))))
import pandas as pd
pd.read_csv("/../you_path")['Salary'].sort_values(ascending=False)[:10]
&AtServerNoContext
Function Top10Salaries(FilePath)
TextReader = New TextReader(FilePath);
Salaries = New ValueList;
Try
String = TextReader.ReadLine();
LineNumber = 1;
ColumnIndex = StrSplit(String, ",").Find("Salary");
While String <> Undefined Do
If LineNumber > 1 Then
Salary = StrSplit(String, ",")[ColumnIndex];
Salaries.Add(Salary);
EndIf;
LineNumber = LineNumber + 1;
String = TextReader.ReadLine();
EndDo;
Salaries.SortByValue(SortDirection.Desc);
While Salaries.Count() > 10 Do
Salaries.Delete(Salaries.Count() - 1);
EndDo;
Except
TextReader.Close();
EndTry;
Return Salaries;
EndFunction
public static async Task<IEnumerable<double>> TotalSalary(string filePath)
{
var cells = (await File.ReadAllLinesAsync(filePath))
.Select(l => l.Split(","))
.ToArray();
var salaryIndex = Array.IndexOf(cells[0], "Salary");
return cells
.Skip(1)
.Select(c => double.Parse(c[salaryIndex]))
.OrderByDescending(s => s)
.Take(10);
}
Salary<10$#^%&#$%&#$%dat.csv
@pirj
Copy link

pirj commented Oct 3, 2021

Is it worth mentioning that sort.take(10) and max(10) aren't exactly of the same complexity?
Sorting suggests O(n log n) run complexity and O(n) space complexity. While max can handle the same with O(n) and O(1) with just one iteration over the set and a bit above 10 memory slots needed.

There is a data structure I forgot the name of that is O(1) efficient for keeping M max values (10 in our case).

I have no certainty that even languages with inherently lazy sorting can optimize subsequent picking of 10 elements to use the approach similar to what max does.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment