Skip to content

Instantly share code, notes, and snippets.

@jordigg
Created July 12, 2023 12:01
Show Gist options
  • Save jordigg/3ede57b1dbe08167f73004ef9607cc91 to your computer and use it in GitHub Desktop.
Save jordigg/3ede57b1dbe08167f73004ef9607cc91 to your computer and use it in GitHub Desktop.
Download Expense attachment PDF from Holded ERP API. Expenses are stored in a folder structure by year > month > invoice.pdf a CSV file is generated to provide logs about the progress and files exported
import csv
import requests
import json
import base64
import os
from datetime import datetime
import re
from tqdm import tqdm
# File to store the processed documents
processed_docs_file = 'processed_docs.csv'
def sanitize_filename(filename):
# Remove any character that isn't a word character, space, or hyphen
return re.sub(r'[^\w\s-]', '', filename).strip()
def main():
# API token for authentication
api_token = 'YOUR_API_TOKEN_HERE'
# Define the API endpoint for listing documents
list_documents_url = 'https://api.holded.com/api/invoicing/v1/documents/purchase'
# Define the start and end timestamps
start_timestamp = '1514761200' # Unix time 01/01/2018
end_timestamp = '1704063600' # Unix time 31/12/2023
# Define the headers for the API request
headers = {
'accept': 'application/json',
'key': api_token
}
# Load already processed documents
processed_docs = []
if os.path.exists(processed_docs_file):
with open(processed_docs_file, 'r') as f:
reader = csv.reader(f)
processed_docs = list(reader)
else:
with open(processed_docs_file, 'w', newline='') as f:
writer = csv.writer(f)
writer.writerow(['doc_id', 'status', 'doc_number', 'date',
'contact_name', 'desc', 'filename', 'file_path'])
# Make the API request to list the documents
response = requests.get(
list_documents_url,
headers=headers,
params={'starttmp': start_timestamp, 'endtmp': end_timestamp}
)
# Check if the API request was successful
if response.status_code == 200:
# Parse the JSON response
documents = json.loads(response.text)
# Loop through each document
for doc in tqdm(documents, desc="Processing records"):
# Get the document ID
doc_id = doc['id']
# Convert the date to a datetime object
date = datetime.fromtimestamp(doc['date'])
# Define the directory path based on the date
directory_path = os.path.join('expenses', str(
date.year), str(date.month).zfill(2))
# Create the directory if it doesn't already exist
os.makedirs(directory_path, exist_ok=True)
doc_number = sanitize_filename(str(doc['docNumber']))
contact_name = sanitize_filename(doc['contactName'])
# Define the filename
filename = f'{doc_id} {contact_name}.pdf'
# Define the full path for the file
file_path = os.path.join(directory_path, filename)
# If document has not been processed, download the PDF
if not any(doc[0] == doc_id for doc in processed_docs):
# Define the API endpoint for downloading the PDF
get_pdf_url = f'https://api.holded.com/api/invoicing/v1/documents/purchase/{doc_id}/pdf'
# Make the API request to download the PDF
pdf_response = requests.get(get_pdf_url, headers=headers)
# Check if the API request was successful
if pdf_response.status_code == 200:
# Parse the JSON response
pdf_data = json.loads(pdf_response.text)
# Check if 'data' is in the response
if 'data' in pdf_data:
# Decode the PDF data from Base64
pdf_bytes = base64.b64decode(pdf_data['data'])
# Save the PDF
with open(file_path, 'wb') as pdf_file:
pdf_file.write(pdf_bytes)
status = "Downloaded"
else:
# Print the whole response if 'data' is not in it
# print(f'Response does not contain data: {pdf_data}')
filename = ""
file_path = ""
status = "Attachment missing"
# Write the document details to the CSV file
with open(processed_docs_file, 'a', newline='') as f:
writer = csv.writer(f)
writer.writerow([doc_id, status, doc['docNumber'], date.strftime(
'%d/%m/%Y'), doc['contactName'], doc['desc'], filename, file_path])
else:
# Print an error message if the API request was unsuccessful
print(
f'Error downloading PDF for document {doc_id}: {pdf_response.status_code}')
else:
# Print an error message if the API request was unsuccessful
print(f'Error listing documents: {response.status_code}')
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment