-
-
Save jitsejan/2735e8ed80a70a67d1fa40a2d969c7eb to your computer and use it in GitHub Desktop.
import pandas as pd | |
from azure.cosmosdb.table.tableservice import TableService | |
CONNECTION_STRING = "DUMMYSTRING" | |
SOURCE_TABLE = "DUMMYTABLE" | |
def set_table_service(): | |
""" Set the Azure Table Storage service """ | |
return TableService(connection_string=CONNECTION_STRING) | |
def get_dataframe_from_table_storage_table(table_service, filter_query): | |
""" Create a dataframe from table storage data """ | |
return pd.DataFrame(get_data_from_table_storage_table(table_service, | |
filter_query)) | |
def get_data_from_table_storage_table(table_service, filter_query): | |
""" Retrieve data from Table Storage """ | |
for record in table_service.query_entities( | |
SOURCE_TABLE, filter=filter_query | |
): | |
yield record | |
fq = "PartitionKey eq '12345'" | |
ts = set_table_service() | |
df = get_dataframe_from_table_storage_table(table_service=ts, | |
filter_query=fq) |
Plus 1 for cliffeby above.
Hi,
I tried this code for extracting nearly 1 TB data but it will go memory out,can you suggest how to optimize this code?
Typo in: df = get_data_dataframe_from_table_storage_table(table_service=ts,
filter_query=fq)Should be: df = get_dataframe_from_table_storage_table(table_service=ts,
filter_query=fq)
Thanks @cliffeby and @nigelainscoe. Fixed it within 1 year!
Hi,
I tried this code for extracting nearly 1 TB data but it will go memory out,can you suggest how to optimize this code?
Hi @Aatmaj1,
I have not tried this with big data sets. In one of my bigger projects however I used the above code, but instead of writing the whole table at once to a Pandas dataframe I modified the fq
filter to iterate through the table by month
and year
and concatenated the Pandas dataframes with pandas.concat
to get a single dataframe in the end.
If you have a more specific issue, please let me know.
Typo in: df = get_data_dataframe_from_table_storage_table(table_service=ts,
filter_query=fq)
Should be: df = get_dataframe_from_table_storage_table(table_service=ts,
filter_query=fq)