Skip to content

Instantly share code, notes, and snippets.

@ivan-ushakov
Last active May 15, 2019 19:02
Show Gist options
  • Save ivan-ushakov/ee8257e5bb3e8c872f2f07c712e76972 to your computer and use it in GitHub Desktop.
Save ivan-ushakov/ee8257e5bb3e8c872f2f07c712e76972 to your computer and use it in GitHub Desktop.
main.swift
import Foundation
let FILE_BUFFER_SIZE = 50000
// source data
struct DebtRec {
var company: String
var phones: Array<String>
var debt: Double
}
// result data
class Debtor {
var companies = Set<String>()
var phones = Set<String>()
var debt: Double = 0.0
}
class Debtors {
var all: Array<Debtor> = []
var index_by_phone: Dictionary<String, Int> = [:]
}
func main() {
var res = Debtors()
var fflag = 0
for arg in CommandLine.arguments {
if arg == "-f" {
fflag = 1
}
else if fflag == 1 {
fflag = 2
print("\(arg):")
let tbegin = Date()
let (count, errcount) = process_file(fname: arg, res: &res)
// print("PROCESSED: \(count) objects in \(DateInterval(start: tbegin, end: Date()).duration)s, \(errcount) errors found")
}
}
for (di, d) in res.all.enumerated() {
print("-------------------------------")
print("#\(di): debt: \(d.debt)")
print("companies: \(d.companies)\nphones: \(d.phones)")
}
if fflag < 2 {
print("USAGE: fastpivot -f \"file 1\" -f \"file 2\" ...")
}
}
func process_file(fname: String, res: inout Debtors) -> (Int, Int) {
var count = 0
var errcount = 0
guard let inputStream = InputStream(fileAtPath: fname) else {
print("ERROR: Unable to open file")
return (count, errcount)
}
inputStream.open()
var obj = [UInt8]()
var braces = 0
var buffer = Array<UInt8>(repeating: 0, count: FILE_BUFFER_SIZE)
while true {
let r = inputStream.read(&buffer, maxLength: buffer.count)
if r <= 0 {
break // EOF
}
for i in 0..<r {
let b = buffer[i]
if b == 123 { // {
braces += 1
obj.append(b)
} else if b == 125 { // }
braces -= 1
obj.append(b)
if braces == 0 { //object formed !
do {
let o = try JSONSerialization.jsonObject(with: Data(obj))
process_object(o: o, res: &res)
} catch {
print("JSON ERROR")
errcount += 1
}
count += 1
obj.removeAll(keepingCapacity: true)
}
} else if braces > 0 {
obj.append(b)
}
}
}
return (count, errcount)
}
func process_object(o: Any, res: inout Debtors) {
let dr = extract_data(o)
// print("\(dr.company) - \(dr.phones) - \(dr.debt)")
var di = -1 // debtor index search result
for p in dr.phones {
if let i = res.index_by_phone[p] {
di = i
break
}
}
if di != -1 {
// existing debtor
let d = res.all[di]
d.companies.insert(dr.company)
for p in dr.phones {
d.phones.insert(p)
res.index_by_phone[p] = di
}
d.debt += dr.debt
} else {
// new debtor
let d = Debtor()
let i = res.all.count
d.companies.insert(dr.company)
for p in dr.phones {
d.phones.insert(p)
res.index_by_phone[p] = i
}
d.debt = dr.debt
res.all.append(d)
}
}
func val2str(_ v: Any) -> String {
if let vs = v as? String {
return vs
}
if let vi = v as? Int {
return String(vi)
}
return "null"
}
func extract_data(_ o: Any) -> DebtRec {
let object = o as! NSDictionary
let companyResult: String
let c = object.value(forKey: "company")!
if let company = c as? NSDictionary {
companyResult = val2str(company.value(forKey: "name")!)
} else {
companyResult = val2str(c)
}
var phonesResult = [String]()
let pp = object.value(forKey: "phones")
if let pp = pp as? NSArray {
for p in pp {
phonesResult.append(val2str(p))
}
} else if pp != nil {
phonesResult.append(val2str(pp!))
}
if let p = object.value(forKey: "phone") {
phonesResult.append(val2str(p))
}
var debtResult = Double(0.0)
if let d = object.value(forKey: "debt") {
if let dd = d as? Double {
debtResult = dd
} else if let ds = d as? String {
debtResult = Double(ds)!
}
}
return DebtRec(company: companyResult, phones: phonesResult, debt: debtResult)
}
main()
@modestman
Copy link

Еще чуть-чуть можно ускорить, заменив Data(obj) на

let data = Data(bytesNoCopy: &obj, count: obj.count, deallocator: .none)

@ivankolesnik
Copy link

К сожалению, не будет работать на Linux из-за NSArray\NSDictionary.

@ivan-ushakov
Copy link
Author

ivan-ushakov commented May 15, 2019

Я сделал версию с https://github.com/vdka/JSON вместо JSONSerialization. В ней нет NS классов и as? оператора.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment