-
-
Save alexm1309/40887d00a2090c11101fb73101142c56 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# In[1]: | |
import civis | |
import civis.utils | |
import pandas as pd | |
client = civis.APIClient() | |
# # 1. Create a CSV for Importing | |
# | |
# We write a CSV to disk and copy it to Civis, using Pandas to facilitate formatting and data representation. | |
# In[2]: | |
df = pd.DataFrame([[1, 2.3, 'four'], [2, 3.4, 'five']], columns=['foo', 'bar', 'baz']) | |
with open('test_import.csv', 'w+') as f: | |
df.to_csv(f, index=False) | |
f.seek(0) | |
file_id = civis.io.file_to_civis(f, 'test_import.csv') | |
print(f'File id is {file_id}') | |
# # 2. Configure and Create A CSV Import | |
# | |
# Here, we set up the required parameters for a CSV Import: | |
# | |
# * `source` - contains a list, containing the `file_id` from above. | |
# * `destination` - outlining the schema, table, cluster, and credential to use for importing. | |
# * `first_row_is_header` - True here, since we wrote the headers as part of the Pandas call. | |
# | |
# Since we have not created the table ahead of time, we need to provide column information as well. | |
# In[3]: | |
# Import expects a list of file IDs, since it could have more than one | |
file_ids = [file_id] | |
source = { | |
'file_ids': file_ids | |
} | |
schema = 'scratch' | |
table = 'test_csv_import' | |
destination = { | |
'schema': schema, | |
'table': table, | |
'credential_id': client.default_credential, | |
'remote_host_id': client.get_database_id('redshift-general'), | |
} | |
first_row_is_header = True | |
# In this import, we are providing the table columns | |
table_columns = [ | |
{'name': 'foo', 'sql_type': 'INTEGER'}, | |
{'name': 'bar', 'sql_type': 'FLOAT'}, | |
{'name': 'baz', 'sql_type': 'VARCHAR(5)'} | |
] | |
print('Posting import...') | |
import_response = client.imports.post_files_csv( | |
source, | |
destination, | |
first_row_is_header, | |
table_columns=table_columns, | |
) | |
print(f'Success! import id is {import_response.id}') | |
# # 3. Run the Import | |
# | |
# We now run the import to move the data into the corresponding table, and wait on its result. We could also check the job's status in the UI manually by visiting `https://platform.civisanalytics.com/spa/jobs/{import_response.id}` | |
# In[5]: | |
future = civis.utils.run_job(import_response.id, client=client) | |
future.result() | |
# # 4. Validate Import Success | |
# | |
# The import has succeeded, and the table is ready for use. We verify that it contains our expected results. | |
# In[13]: | |
df = civis.io.read_civis(f'{schema}.{table}', 'redshift-general', use_pandas=True) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment