Skip to content

Instantly share code, notes, and snippets.

@eng-rodrigocunha
Created March 17, 2023 02:33
Show Gist options
  • Save eng-rodrigocunha/23e2bdf89548d6c8e5952697cca0c8be to your computer and use it in GitHub Desktop.
Save eng-rodrigocunha/23e2bdf89548d6c8e5952697cca0c8be to your computer and use it in GitHub Desktop.
Realiza download de bucket no GCS e procura quais arquivos possuem determinada condição
import basedosdados as bd
import pandas as pd
import glob
bd.config.project_config_path = "D:\\basedosdados\\staging"
for hour in range(14, 24, 1):
print(hour)
st = bd.Storage(dataset_id="br_rj_riodejaneiro_onibus_gps", table_id="registros")
st.download(savepath=".", partitions=f"data=2023-03-08/hora={hour}", mode="staging")
files_list = glob.glob(r"E:\SMTR\smtr-sandbox\staging\br_rj_riodejaneiro_onibus_gps\registros\data=2023-03-08\**\*.csv",
recursive=True)
last_file = None
if last_file is not None:
files_list = files_list[:(files_list.index(last_file))]
for file in files_list:
df = pd.read_csv(file)
cond = ((df["ordem"] == "A27556") & (df["latitude"] == "-22,94376"))
last_file = None
if len(df[cond]) > 0:
df[cond]
print(file)
last_file = file
print(last_file)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment