Last active
August 15, 2023 17:45
-
-
Save dewitt4/26c8dddab913a2457465ce026c245542 to your computer and use it in GitHub Desktop.
Python function that uses the Google Drive API to search for files, ingests the raw data from those files into Pandas dataframes, and then analyzes the data using OpenAI's API for natural language processing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import pickle | |
import pandas as pd | |
from googleapiclient.discovery import build | |
from google_auth_oauthlib.flow import InstalledAppFlow | |
from google.auth.transport.requests import Request | |
import openai | |
# Google Drive API settings | |
SCOPES_DRIVE = ['https://www.googleapis.com/auth/drive.readonly'] | |
# OpenAI API key | |
OPENAI_API_KEY = 'YOUR_OPENAI_API_KEY' | |
def authenticate_google_drive(): | |
creds = None | |
if os.path.exists('token_drive.pickle'): | |
with open('token_drive.pickle', 'rb') as token: | |
creds = pickle.load(token) | |
if not creds or not creds.valid: | |
if creds and creds.expired and creds.refresh_token: | |
creds.refresh(Request()) | |
else: | |
flow = InstalledAppFlow.from_client_secrets_file( | |
'credentials_drive.json', SCOPES_DRIVE) | |
creds = flow.run_local_server(port=0) | |
with open('token_drive.pickle', 'wb') as token: | |
pickle.dump(creds, token) | |
return creds | |
def search_and_analyze_files(query): | |
creds = authenticate_google_drive() | |
drive_service = build('drive', 'v3', credentials=creds) | |
try: | |
results = drive_service.files().list( | |
q=query, | |
fields="nextPageToken, files(id, name)").execute() | |
files = results.get('files', []) | |
if not files: | |
print('No files found.') | |
return | |
openai.api_key = OPENAI_API_KEY | |
for file in files: | |
print(f"Processing {file['name']} ({file['id']})") | |
download_url = f"https://drive.google.com/uc?id={file['id']}" | |
df = pd.read_csv(download_url) # Adjust for your file type | |
# Analyze data using OpenAI | |
data_analysis_prompt = f"Analyze the data in the file {file['name']}." | |
response = openai.Completion.create( | |
engine="davinci", prompt=data_analysis_prompt, max_tokens=100 | |
) | |
analysis_result = response.choices[0].text | |
print("Data Analysis:") | |
print(analysis_result) | |
# You can further process or visualize the DataFrame and analysis result as needed | |
print(df.head()) # Print the first few rows of the DataFrame | |
except Exception as e: | |
print(f'An error occurred: {e}') | |
if __name__ == '__main__': | |
search_query = "Your search query here" # Replace with your search query | |
search_and_analyze_files(search_query) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment