Skip to content

Instantly share code, notes, and snippets.

@ivan-ushakov
Last active November 21, 2019 15:46
Show Gist options
  • Save ivan-ushakov/b86c0267cda35f85da9d49c6b4682b8d to your computer and use it in GitHub Desktop.
Save ivan-ushakov/b86c0267cda35f85da9d49c6b4682b8d to your computer and use it in GitHub Desktop.
import Foundation
import JSON
let FILE_BUFFER_SIZE = 50000
// source data
struct DebtRec {
var company: String
var phones: Array<String>
var debt: Double
}
// result data
final class Debtor {
var companies = Set<String>()
var phones = Set<String>()
var debt: Double = 0.0
}
final class Debtors {
var all: ContiguousArray<Debtor> = []
var index_by_phone: Dictionary<String, Int> = [:]
}
func main() {
var res = Debtors()
var fflag = 0
for arg in CommandLine.arguments {
if arg == "-f" {
fflag = 1
}
else if fflag == 1 {
fflag = 2
print("\(arg):")
let tbegin = Date()
let (count, errcount) = process_file(fname: arg, res: &res)
// print("PROCESSED: \(count) objects in \(DateInterval(start: tbegin, end: Date()).duration)s, \(errcount) errors found")
}
}
for (di, d) in res.all.enumerated() {
print("-------------------------------")
print("#\(di): debt: \(d.debt)")
print("companies: \(d.companies)\nphones: \(d.phones)")
}
if fflag < 2 {
print("USAGE: fastpivot -f \"file 1\" -f \"file 2\" ...")
}
}
func process_file(fname: String, res: inout Debtors) -> (Int, Int) {
var count = 0
var errcount = 0
guard let inputStream = InputStream(fileAtPath: fname) else {
print("ERROR: Unable to open file")
return (count, errcount)
}
inputStream.open()
var obj = [UInt8]()
var braces = 0
var buffer = Array<UInt8>(repeating: 0, count: FILE_BUFFER_SIZE)
while true {
let r = inputStream.read(&buffer, maxLength: buffer.count)
if r <= 0 {
break // EOF
}
for i in 0..<r {
let b = buffer[i]
if b == 123 { // {
braces += 1
obj.append(b)
} else if b == 125 { // }
braces -= 1
obj.append(b)
if braces == 0 { //object formed !
do {
let o = try JSON.Parser.parse(obj, options: [])
process_object(o: o, res: &res)
} catch {
print("JSON ERROR")
errcount += 1
}
count += 1
obj.removeAll(keepingCapacity: true)
}
} else if braces > 0 {
obj.append(b)
}
}
}
return (count, errcount)
}
func process_object(o: JSON, res: inout Debtors) {
let dr = extract_data(o)
// print("\(dr.company) - \(dr.phones) - \(dr.debt)")
var di = -1 // debtor index search result
for p in dr.phones {
if let i = res.index_by_phone[p] {
di = i
break
}
}
if di != -1 {
// existing debtor
let d = res.all[di]
d.companies.insert(dr.company)
for p in dr.phones {
d.phones.insert(p)
res.index_by_phone[p] = di
}
d.debt += dr.debt
} else {
// new debtor
let d = Debtor()
let i = res.all.count
d.companies.insert(dr.company)
for p in dr.phones {
d.phones.insert(p)
res.index_by_phone[p] = i
}
d.debt = dr.debt
res.all.append(d)
}
}
func val2str(_ v: JSON) -> String {
switch v {
case .string(let value):
return value
case .integer(let value):
return String(value)
default:
return "null"
}
}
func extract_data(_ object: JSON) -> DebtRec {
let companyResult: String
let c = object["company"]!
switch c {
case .object(let value):
companyResult = val2str(value["name"]!)
default:
companyResult = val2str(c)
}
var phonesResult: [String]
if let pp = object["phones"] {
switch pp {
case .array(let value):
phonesResult = value.map { val2str($0) }
default:
phonesResult = [val2str(pp)]
}
} else {
phonesResult = []
}
if let p = object["phone"] {
phonesResult.append(val2str(p))
}
var debtResult = Double(0.0)
if let d = object["debt"] {
switch d {
case .double(let value):
debtResult = value
case .string(let value):
debtResult = Double(value)!
default:
break
}
}
return DebtRec(company: companyResult, phones: phonesResult, debt: debtResult)
}
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment