Created
May 30, 2024 19:54
-
-
Save jster1357/3acdf74a08584fc2cd42d06c15515de9 to your computer and use it in GitHub Desktop.
We can make this file beautiful and searchable if this error is corrected: No commas found in this CSV file in line 0.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
5994000001|53532433|O|176627.30|1997-03-30|1-URGENT|Clerk#000632485|0|ly bold sentiments integrate doggedly? furious | |
5994000002|9601419|F|292586.12|1993-10-16|3-MEDIUM|Clerk#000171252|0|special pinto beans; furiously even ideas sle | |
5994000003|41037313|F|218489.85|1994-07-28|2-HIGH|Clerk#000218085|0|accounts may cajole. final dinos wake f | |
5994000004|14091148|O|52208.87|1998-02-22|2-HIGH|Clerk#000348944|0|ct furiously around the care | |
5994000005|129915457|F|201126.75|1993-07-18|1-URGENT|Clerk#000687363|0|ainst the slyly special courts. quickly ironi |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pyarrow.csv as csv | |
import pyarrow as pa | |
import pyarrow.fs | |
import gcsfs | |
import pyarrow.parquet as pq | |
##set read/write paths | |
gcs_read_path = 'my object path' | |
gcs_write_path = 'my object path' | |
##set file service to read from | |
fs = gcsfs.GCSFileSystem(project='my-project') | |
# Read options | |
read_options = csv.ReadOptions( | |
column_names=[ 'o_orderkey', 'o_custkey','o_orderstatus','o_totalprice',"o_orderdate", "o_orderpriority", "o_clerk", "o_shippriority", "o_comment"] | |
) | |
# Parse options | |
parse_options = csv.ParseOptions( | |
delimiter="|" | |
) | |
#conversion options | |
convert_options = csv.ConvertOptions( | |
column_types={"o_totalprice": pa.decimal128(10, 2), \ | |
"o_orderdate": pa.date32()} | |
) | |
#open file with options | |
with fs.open(gcs_read_path, 'rb') as f: | |
table = csv.read_csv(f, read_options=read_options,parse_options=parse_options,convert_options=convert_options) | |
f.close() | |
with fs.open(gcs_write_path,'wb') as f: | |
pq.write_table(table,f) | |
f.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment