Skip to content

Instantly share code, notes, and snippets.

@ktunprasert
Last active September 20, 2023 15:06
Show Gist options
  • Save ktunprasert/e4c21617c688f3572c36a31723bac5d5 to your computer and use it in GitHub Desktop.
Save ktunprasert/e4c21617c688f3572c36a31723bac5d5 to your computer and use it in GitHub Desktop.
Mix.install([
{:explorer, "~> 0.7.0"},
{:jason, "~> 1.4"},
{:httpoison, "~> 2.1.0"}
])
files = [
"input/2023/august/csv/6602.csv"
# "input/2023/august/csv/6603.csv"
]
HTTPoison.start()
dtypes = [cost: :float]
columns = [
"id",
"date",
"name",
"notes",
"telephone",
"address1",
"address2",
"taxcode",
"item",
"amount",
"cost",
"total_products",
"total_before_discount",
"discount",
"total",
"vat",
"baht"
]
defmodule Main do
require Explorer.DataFrame
alias Explorer.DataFrame, as: DF
alias Explorer.Series, as: S
@tenant "avocado"
def main(files, columns) do
dfs = parse_files(files, columns)
dfs
|> to_map
|> Enum.flat_map(& &1)
|> Task.async_stream(
fn data ->
j = Jason.encode!(data)
%{status_code: code} = HTTPoison.post!("http://localhost:8090/api/invoice/", j)
IO.puts("#{code}: #{j}")
IO.puts("writing to PDF...")
%{body: pdf} =
HTTPoison.get!(
"http://localhost:8090/gotenberg/?tenant=avocado&invoice=#{data["invoice_id"]}"
)
File.write("elixir_output/#{data["invoice_id"]}.pdf", pdf)
IO.puts("done writing #{data["invoice_id"]}.pdf")
end,
max_concurrency: 5,
time_out: 10_000
)
|> Enum.to_list()
end
def to_map(dfs) do
for df <- dfs do
df
|> DF.to_rows_stream()
|> Enum.map(
&%{
"_id" => "#{@tenant}:#{&1["id"]}",
"tenant_id" => @tenant,
"invoice_id" => &1["id"],
"issued_date" => &1["date"],
"items" => [
%{
:code => "01",
:description => &1["item"],
:amount => &1["amount"],
:cost => &1["cost"],
:total => &1["total"]
}
],
"customer" => %{
:name => &1["name"],
:address1 => &1["address1"],
:address2 => &1["address2"],
:telephone => &1["telephone"],
:taxcode => &1["taxcode"] || ""
},
"discount" => &1["discount"] || 0,
"total" => &1["total"],
"total_before_discount" => &1["total_before_discount"],
"novat" => &1["baht"],
"vat" => &1["vat"]
}
)
end
end
defp parse_files(files, columns) do
with df_list <-
files
|> Enum.map(&DF.from_csv!(&1, delimiter: "\t"))
|> Enum.map(&DF.rename(&1, columns)) do
for df <- df_list do
df =
[
cost: strip_remove_comma_to_float(df["cost"]),
total: strip_remove_comma_to_float(df["total"]),
vat: strip_remove_comma_to_float(df["vat"]),
baht: strip_remove_comma_to_float(df["baht"]),
total_before_discount: strip_remove_comma_to_float(df["total_before_discount"]),
total_products: strip_remove_comma_to_float(df["total_products"]),
discount: strip_remove_comma_to_float(df["discount"])
]
|> Enum.reduce(df, fn {key, value}, acc -> DF.put(acc, key, value) end)
df
end
end
end
defp strip_remove_comma_to_float(s) do
s
|> S.transform(fn
str when is_binary(str) ->
str |> String.trim() |> String.replace(~r/[,.]/, "")
it ->
it
end)
|> S.cast(:integer)
end
end
Main.main(files, columns)
exit(:end)
import pandas as pd
import requests
def decToInt(string):
[integer, decimal] = str(string).strip().split(".")
integer = integer.replace(',', '')
return integer, int(decimal)
files = [
# "input/6506.csv",
# "input/6507.csv",
# "input/6508.csv",
# "input/6509.csv",
# "input/6510.csv",
# "input/651117.csv",
# "input/2023/6506.csv",
# "input/2023/6507.csv",
# "input/2023/6508.csv",
# "input/2023/6509.csv",
# "input/2023/6510.csv",
# "input/2023/6511.csv",
# "input/2023/6512.csv",
# "input/2023/6601.csv", "input/2023/6602.csv", "input/2023/6603.csv",
# "input/2023/6603.csv"
# "input/2023/6603-1.csv"
# "input/2023/new/6512.csv",
# "input/2023/new/6601.csv",
# "input/2023/new/6603.csv",
# "input/2023/new/6604.csv",
# "input/2023/new/6605.csv",
# "input/2023/new/6606.csv",
# "input/2023/new/6607.csv",
"input/2023/august/csv/6602.csv",
# "input/2023/august/csv/6603.csv",
# "input/2023/august/csv/6604.csv",
# "input/2023/august/csv/6605.csv",
# "input/2023/august/csv/6606.csv",
# "input/2023/august/csv/6607.csv",
]
columns = [
"id",
"date",
"name", "notes", "telephone", "address1", "address2", "taxcode", "item", "amount", "cost", "total_products", "total_before_discount", "discount", "total", "vat", "baht"
# "date", "id", "name", "notes", "telephone", "address1", "address2", "taxcode", "item", "amount", "cost", "total_products", "total_before_discount", "discount", "total", "vat", "baht"
]
def readFromCsvPrintToOutput(fn):
df = pd.read_csv(fn, sep="\t")
df.fillna("", inplace=True)
df.columns = columns
for i, row in df.iterrows():
payload = dict(row)
tenant = "avocado"
print(payload)
[c_int, c_dec] = decToInt(payload['cost'])
[t_int, t_dec] = decToInt(payload['total'])
[v_int, v_dec] = decToInt(payload['vat'])
[nv_int, nv_dec] = decToInt(payload['baht'])
[tbd_int, tbd_dec] = decToInt(payload['total_before_discount'])
[tp_int, tp_dec] = decToInt(payload['total_products'])
[d_int, d_dec] = [0, 00]
if "," in payload["discount"]:
[d_int, d_dec] = decToInt(payload['discount'])
j = {
"_id": f"{tenant}:{payload['id']}",
"tenant_id": tenant,
"invoice_id": str(payload['id']),
"issued_date": payload['date'],
"items": [
{
"code": "01",
"description": payload['item'],
"amount": int(payload['amount']),
"cost": int(f"{c_int}{c_dec:<02d}"),
# "total": int(f"{nv_int}{nv_dec:<02d}"),
"total": int(f"{tp_int}{tp_dec:<02d}"),
}
],
"customer": {
"name": payload['name'],
"address1": payload['address1'],
"address2": str(payload['address2']),
"telephone": payload['telephone'],
"taxcode": payload["taxcode"],
},
"discount": int(f"{d_int}{d_dec:>02d}"),
"total": int(f"{t_int}{t_dec:>02d}"),
"total_before_discount": int(f"{tbd_int}{tbd_dec:>02d}"),
"novat": int(f"{nv_int}{nv_dec:>02d}"),
"vat": int(f"{v_int}{v_dec:>02d}"),
}
# print(j['_id'])
# print(j['_id'], j['novat'], int(f"{nv_int}{nv_dec:<02d}"))
# continue
# print(j)
# break
res = requests.post(f"http://localhost:8090/api/invoice/", json=j)
# print(res)
pdf_url = f"http://localhost:8090/gotenberg/?tenant=avocado&invoice={payload['id']}"
pdf = requests.get(pdf_url)
print("writing to " + f"output/avocado_{payload['id']}.pdf")
with open(f"output/avocado_{payload['id']}.pdf", "wb") as f:
f.write(pdf.content)
print("done")
# break
# readFromCsvPrintToOutput("input/2023/6506.csv")
for fn in files:
readFromCsvPrintToOutput(fn)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment