Skip to content

Instantly share code, notes, and snippets.

@taylorbrooks
Created September 16, 2016 20:35
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save taylorbrooks/315e98cd1d536cd81191aab9921e7676 to your computer and use it in GitHub Desktop.
Save taylorbrooks/315e98cd1d536cd81191aab9921e7676 to your computer and use it in GitHub Desktop.
s = Time.now
lines = File.read_lines("./files/2014.txt").compact_map{|line| process(line) }
things = lines.select{|hsh| hsh[:amount] > 100000 }
e = Time.now
p things.size
p e-s
def process(line)
return if line.size < 450
last_name = line[0..5].strip
first_name = line[40..69].strip
address_1 = line[70..99].strip
address_2 = line[100..129].strip
address_3 = line[130..159].strip
city = line[160..189].strip
state = line[190..191].strip
zip = line[192..200].strip
tax_id = line[201..209].strip
amount = line[248..257].strip.to_i
holder_contact = line[280..319].strip
holder_address_1 = line[320..349].strip
holder_city = line[410..439].strip
holder_state = line[440..441].strip
holder_zip = line[442..450].strip
{
last_name: last_name,
first_name: first_name,
address_1: address_1,
address_2: address_2,
address_3: address_3,
city: city,
state: state,
zip: zip,
tax_id: tax_id,
amount: amount,
holder_contact: holder_contact,
holder_address_1: holder_address_1,
holder_city: holder_city,
holder_state: holder_state,
holder_zip: holder_zip
}
end
defmodule Mix.Tasks.ProcessFile do
use Mix.Task
alias Experimental.Flow
@shortdoc "ingests file"
def run(args) do
start = DateTime.utc_now |> DateTime.to_unix
"./files/2014.txt"
|> File.stream!(read_ahead: 100_000)
|> Flow.from_enumerable()
|> Flow.map(fn(line) -> process(line) end)
|> Flow.partition()
|> Flow.filter(fn(x) -> x.amount > 100_000 end)
|> Enum.count
|> IO.inspect
fin = DateTime.utc_now |> DateTime.to_unix
IO.puts(fin - start)
end
def process(line) do
last_name = String.slice(line, 0..39) |> String.trim
first_name = String.slice(line, 40..69) |> String.trim
address_1 = String.slice(line, 70..99) |> String.trim
address_2 = String.slice(line, 100..129) |> String.trim
address_3 = String.slice(line, 130..159) |> String.trim
city = String.slice(line, 160..189) |> String.trim
state = String.slice(line, 190..191) |> String.trim
zip = String.slice(line, 192..200) |> String.trim
tax_id = String.slice(line, 201..209) |> String.trim
amount = String.slice(line, 248..257) |> String.trim |> process_amount
holder_contact = String.slice(line, 280..319) |> String.trim
holder_address_1 = String.slice(line, 320..349) |> String.trim
holder_city = String.slice(line, 410..439) |> String.trim
holder_state = String.slice(line, 440..441) |> String.trim
holder_zip = String.slice(line, 442..450) |> String.trim
%{
last_name: last_name,
first_name: first_name,
address_1: address_1,
address_2: address_2,
address_3: address_3,
city: city,
state: state,
zip: zip,
tax_id: tax_id,
amount: amount,
holder_contact: holder_contact,
holder_address_1: holder_address_1,
holder_city: holder_city,
holder_state: holder_state,
holder_zip: holder_zip,
}
end
def process_amount(""), do: 0
def process_amount(num), do: num |> String.to_integer
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment