Skip to content

Instantly share code, notes, and snippets.

@ivan-ushakov
Created May 6, 2019 18:27
Show Gist options
  • Save ivan-ushakov/65ad074216e33f222b6a572069e4e8f5 to your computer and use it in GitHub Desktop.
Save ivan-ushakov/65ad074216e33f222b6a572069e4e8f5 to your computer and use it in GitHub Desktop.
import Foundation
let FILE_BUFFER_SIZE = 50000
// source data
class DebtRec {
var company: String = ""
var phones: Array<String> = []
var debt: Double = 0.0
}
// result data
class Debtor {
var companies: Set<String> = []
var phones: Set<String> = []
var debt: Double = 0.0
}
class Debtors {
var all: Array<Debtor> = []
var index_by_phone: Dictionary<String, Int> = [:]
}
func main() {
var res = Debtors()
var fflag = 0
for arg in CommandLine.arguments {
if arg == "-f" {
fflag = 1
}
else if fflag == 1 {
fflag = 2
print("\(arg):")
let tbegin = Date()
let (count, errcount) = process_file(fname: arg, res: &res)
// print("PROCESSED: \(count) objects in \(DateInterval(start: tbegin, end: Date()).duration)s, \(errcount) errors found")
}
}
for (di, d) in res.all.enumerated() {
print("-------------------------------")
print("#\(di): debt: \(d.debt)")
print("companies: \(d.companies)\nphones: \(d.phones)")
}
if fflag < 2 {
print("USAGE: fastpivot -f \"file 1\" -f \"file 2\" ...")
}
}
func process_file(fname: String, res: inout Debtors) -> (Int, Int) {
var count = 0
var errcount = 0
guard let inputStream = InputStream(fileAtPath: fname) else {
print("ERROR: Unable to open file")
return (count, errcount)
}
inputStream.open()
var obj: Array<UInt8> = []
var braces = 0
var buffer = Array<UInt8>(repeating: 0, count: FILE_BUFFER_SIZE)
while true {
let r = inputStream.read(&buffer, maxLength: buffer.count)
if r <= 0 {
break // EOF
}
for i in 0..<r {
let b = buffer[i]
if b == 123 { // {
braces += 1
obj.append(b)
}
else if b == 125 { // }
braces -= 1
obj.append(b)
if braces == 0 { //object formed !
do {
let o = try JSONSerialization.jsonObject(with: Data(obj))
process_object(o: (o as! Dictionary<String, Any>), res: &res)
} catch {
print("JSON ERROR")
errcount += 1
}
count += 1
obj.removeAll(keepingCapacity: true)
}
}
else if braces > 0 {
obj.append(b)
}
}
}
return (count, errcount)
}
func process_object(o: Dictionary<String, Any>, res: inout Debtors) {
let dr = extract_data(o)
//print("\(dr.company) - \(dr.phones) - \(dr.debt)")
var di: Optional<Int> = Optional.none //debtor index search result
for p in dr.phones {
if let i = res.index_by_phone[p] {
di = Optional.some(i)
break
}
}
if let i = di { //existing debtor
let d = res.all[i]
d.companies.insert(dr.company)
for p in dr.phones {
d.phones.insert(p)
res.index_by_phone[p] = i
}
d.debt += dr.debt
}
else { //new debtor
let d = Debtor()
let i = res.all.count
d.companies.insert(dr.company)
for p in dr.phones {
d.phones.insert(p)
res.index_by_phone[p] = i
}
d.debt = dr.debt
res.all.append(d)
}
}
func extract_data(_ o: Dictionary<String, Any>) -> DebtRec {
func val2str(_ v: Any) -> String {
if let vs = (v as? String) {
return vs
}
else if let vi = (v as? Int) {
return String(vi)
}
else {
return "null"
}
}
let dr = DebtRec()
let c = o["company"]!
if let company = (c as? Dictionary<String, Any>) {
dr.company = val2str(company["name"]!)
} else {
dr.company = val2str(c)
}
let pp = o["phones"]
if let pp = (pp as? Array<Any>) {
for p in pp {
dr.phones.append(val2str(p))
}
}
else if pp != nil {
dr.phones.append(val2str(pp!))
}
let p = o["phone"]
if p != nil {
dr.phones.append(val2str(p!))
}
if let d = o["debt"] {
if let dd = (d as? Double) {
dr.debt = dd
}
else if let ds = (d as? String) {
dr.debt = Double(ds)!
}
}
return dr
}
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment