Skip to content

Instantly share code, notes, and snippets.

@groupdocs-cloud-gists
Last active March 19, 2021 10:23
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save groupdocs-cloud-gists/571396380ccfc4dd4e89371320d0452e to your computer and use it in GitHub Desktop.
Save groupdocs-cloud-gists/571396380ccfc4dd4e89371320d0452e to your computer and use it in GitHub Desktop.
Extract text from PDF documents programmatically using a REST API in Python.
Extract Images from PDF Documents
1. Programmatically upload a PDF file on the cloud
2. Extract Images from a PDF document programmatically using Python.
3. Download the Image files from the cloud.
client_id = "112f0f38-9dae-42d5-b4fc-cc84ae644972"
client_secret = "16ad3fe0bdc39c910f57d2fd48a5d618"
configuration = groupdocs_parser_cloud.Configuration(client_id, client_secret)
configuration.api_base_url = "https://api.groupdocs.cloud"
my_storage = ""
# api initialization
parseApi = groupdocs_parser_cloud.ParseApi.from_config(configuration)
# define image options
options = groupdocs_parser_cloud.ImagesOptions()
options.file_info = groupdocs_parser_cloud.FileInfo()
options.file_info.file_path = "sample.pdf"
# define page range
options.start_page_number = 1
options.count_pages_to_extract = 1
# create request
request = groupdocs_parser_cloud.ImagesRequest(options)
result = parseApi.images(request)
for page in result.pages:
print("Images from " + str(page.page_index) + " page.")
for image in page.images:
print("Image path in storage: " + image.path + ". Download url: " + image.download_url)
print("Image format: " + image.file_format + ". Page index: " + str(image.page_index))
# api initialization
file_api = groupdocs_parser_cloud.FileApi.from_config(configuration)
my_storage = ""
# Download image
request = groupdocs_parser_cloud.DownloadFileRequest(image.path, my_storage)
response = file_api.download_file(request)
# Move downloaded file to your working directory
shutil.move(response, "C:\\Files\\Images")
# api initialization
parseApi = groupdocs_parser_cloud.ParseApi.from_config(configuration)
# define image options
options = groupdocs_parser_cloud.ImagesOptions()
options.file_info = groupdocs_parser_cloud.FileInfo()
options.file_info.file_path = "PDF_with_attachements.pdf"
options.file_info.password = "password"
# set container item
container_info = groupdocs_parser_cloud.ContainerItemInfo()
container_info.relative_path = "template-document.pdf"
options.container_item_info = container_info
# define page range
options.start_page_number = 2
options.count_pages_to_extract = 1
# create request
request = groupdocs_parser_cloud.ImagesRequest(options)
result = parseApi.images(request)
for page in result.pages:
print("Images from " + str(page.page_index) + " page.")
for image in page.images:
print("Image path in storage: " + image.path + ". Download url: " + image.download_url)
print("Image format: " + image.file_format + ". Page index: " + str(image.page_index))
# api initialization
parseApi = groupdocs_parser_cloud.ParseApi.from_config(configuration)
# define image options
options = groupdocs_parser_cloud.ImagesOptions()
options.file_info = groupdocs_parser_cloud.FileInfo()
options.file_info.file_path = "sample.pdf"
# create request
request = groupdocs_parser_cloud.ImagesRequest(options)
result = parseApi.images(request)
for image in result.images:
print("Image path in storage: " + image.path + ". Download url: " + image.download_url)
print("Image format: " + image.file_format + ". Page index: " + str(image.page_index))
# api initialization
file_api = groupdocs_parser_cloud.FileApi.from_config(configuration)
my_storage = ""
request = groupdocs_parser_cloud.UploadFileRequest("sample.pdf", "C:\\Files\\sample.pdf", my_storage)
response = file_api.upload_file(request)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment