Skip to content

Instantly share code, notes, and snippets.

@dewitt4
Last active August 15, 2023 17:45
Show Gist options
  • Save dewitt4/26c8dddab913a2457465ce026c245542 to your computer and use it in GitHub Desktop.
Save dewitt4/26c8dddab913a2457465ce026c245542 to your computer and use it in GitHub Desktop.
Python function that uses the Google Drive API to search for files, ingests the raw data from those files into Pandas dataframes, and then analyzes the data using OpenAI's API for natural language processing
import os
import pickle
import pandas as pd
from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request
import openai
# Google Drive API settings
SCOPES_DRIVE = ['https://www.googleapis.com/auth/drive.readonly']
# OpenAI API key
OPENAI_API_KEY = 'YOUR_OPENAI_API_KEY'
def authenticate_google_drive():
creds = None
if os.path.exists('token_drive.pickle'):
with open('token_drive.pickle', 'rb') as token:
creds = pickle.load(token)
if not creds or not creds.valid:
if creds and creds.expired and creds.refresh_token:
creds.refresh(Request())
else:
flow = InstalledAppFlow.from_client_secrets_file(
'credentials_drive.json', SCOPES_DRIVE)
creds = flow.run_local_server(port=0)
with open('token_drive.pickle', 'wb') as token:
pickle.dump(creds, token)
return creds
def search_and_analyze_files(query):
creds = authenticate_google_drive()
drive_service = build('drive', 'v3', credentials=creds)
try:
results = drive_service.files().list(
q=query,
fields="nextPageToken, files(id, name)").execute()
files = results.get('files', [])
if not files:
print('No files found.')
return
openai.api_key = OPENAI_API_KEY
for file in files:
print(f"Processing {file['name']} ({file['id']})")
download_url = f"https://drive.google.com/uc?id={file['id']}"
df = pd.read_csv(download_url) # Adjust for your file type
# Analyze data using OpenAI
data_analysis_prompt = f"Analyze the data in the file {file['name']}."
response = openai.Completion.create(
engine="davinci", prompt=data_analysis_prompt, max_tokens=100
)
analysis_result = response.choices[0].text
print("Data Analysis:")
print(analysis_result)
# You can further process or visualize the DataFrame and analysis result as needed
print(df.head()) # Print the first few rows of the DataFrame
except Exception as e:
print(f'An error occurred: {e}')
if __name__ == '__main__':
search_query = "Your search query here" # Replace with your search query
search_and_analyze_files(search_query)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment