Document Processing documentprocessing

## render-or-view-pdf-document-in-browser-using-pdfjs-javascript-library.html
// This example contains necessary HTML and JavaScript code to demonstrate the use of PDF.js library
// by rendering a PDF document in the browser
<html>
<head>

  // Link to PDF.js library
  <script src="../build/pdf.js"></script>

</head>
<body>

## convert-html-to-pdf-via-web-url-in-python-using-weasyprint-library.html
// Import the HTML class from the WeasyPrint library
from weasyprint import HTML

// Instantiate HTML class and call write_pdf() method to convert Website URL to PDF
HTML('https://www.groupdocs.com/').write_pdf('groupdocs-weasyprint.pdf')

## add-annotations-to-images-in-javascript-using-annotorious-library.html
<html>
  <head>
    <!-- Linking Annotorious Stylesheet -->
    <link rel="stylesheet" href="dist/annotorious.min.css">

    <!-- Integrating Annotorious JavaScript Library -->
    <script type="text/javascript" src="dist/annotorious.min.js"></script>
  </head>

  <body>

## extract-images-from-pdf-in-python-using-pymupdf-library.py
# Import PyMuPDF
import fitz

# File path you want to extract images from
file = "data.pdf"

# Open the file
pdf_file = fitz.open(file)

# Iterate over PDF pages

## combine-or-join-multiple-pdfs-in-python-using-pymupdf-library.py
# Import PyMuPDF
import fitz

# Open first document
doc1 = fitz.open("documentprocessing.pdf")

# Open second document
doc2 = fitz.open("data.pdf")

# Append document 2 after document 1

## add-rotate-and-crop-pdf-pages-in-python-using-pypdf-library.py
# Import the PdfWriter & PdfReader classes from the pypdf library
from pypdf import PdfWriter, PdfReader

# Open PDF document and instantiate writer object for performing operations on the PDF
reader = PdfReader("documentprocessing.pdf")
writer = PdfWriter()

# Add page 1 from reader to output document, unchanged:
writer.add_page(reader.pages[0])

## extract-attachments-from-pdf-in-python-using-pypdf-library.py
# Import the PdfReader class from the pypdf library
from pypdf import PdfReader

# Open a PDF file
reader = PdfReader("data.pdf")

# Iterate through the attachments in the PDF
for name, content_list in reader.attachments:

    # Iterate through the contents in each attachment

## extract-font-information-from-pdf-document-in-python-using-pdfminersix-library.py
# Import required classes from the pdfminer.six library
from pdfminer.pdfparser import PDFParser
from pdfminer.pdfdocument import PDFDocument
from pdfminer.pdfpage import PDFPage
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
from pdfminer.converter import PDFPageAggregator

# Open the PDF file
with open('documentprocessing.pdf', 'rb') as pdf_file:


## convert-pdf-to-html-in-python-using-pdfminersix-library.py
# Import extract_text_to_fp function from pdfminer.high_level module
from pdfminer.high_level import extract_text_to_fp

# Import BytesIO class from io module
from io import BytesIO

# Specify the PDF file you want to convert to HTML
pdf_file = 'documentprocessing.pdf'

# Create an in-memory buffer to store the HTML output

## add-crossed-out-text-to-pdf-in-javascript-using-pdfkit.js
// Include pdfkit library and fs module of Node.js
const PDFDocument = require('pdfkit');
const fs = require('fs');

// Create a new PDF document
const doc = new PDFDocument();

// Create a writable stream to save the PDF
const stream = fs.createWriteStream('annotations.pdf');
	// This example contains necessary HTML and JavaScript code to demonstrate the use of PDF.js library
	// by rendering a PDF document in the browser
	<html>
	<head>

	// Link to PDF.js library
	<script src="../build/pdf.js"></script>

	</head>
	<body>
	// Import the HTML class from the WeasyPrint library
	from weasyprint import HTML

	// Instantiate HTML class and call write_pdf() method to convert Website URL to PDF
	HTML('https://www.groupdocs.com/').write_pdf('groupdocs-weasyprint.pdf')
	<html>
	<head>
	<!-- Linking Annotorious Stylesheet -->
	<link rel="stylesheet" href="dist/annotorious.min.css">

	<!-- Integrating Annotorious JavaScript Library -->
	<script type="text/javascript" src="dist/annotorious.min.js"></script>
	</head>

	<body>
	# Import PyMuPDF
	import fitz

	# File path you want to extract images from
	file = "data.pdf"

	# Open the file
	pdf_file = fitz.open(file)

	# Iterate over PDF pages
	# Import PyMuPDF
	import fitz

	# Open first document
	doc1 = fitz.open("documentprocessing.pdf")

	# Open second document
	doc2 = fitz.open("data.pdf")

	# Append document 2 after document 1
	# Import the PdfWriter & PdfReader classes from the pypdf library
	from pypdf import PdfWriter, PdfReader

	# Open PDF document and instantiate writer object for performing operations on the PDF
	reader = PdfReader("documentprocessing.pdf")
	writer = PdfWriter()

	# Add page 1 from reader to output document, unchanged:
	writer.add_page(reader.pages[0])
	# Import the PdfReader class from the pypdf library
	from pypdf import PdfReader

	# Open a PDF file
	reader = PdfReader("data.pdf")

	# Iterate through the attachments in the PDF
	for name, content_list in reader.attachments:

	# Iterate through the contents in each attachment
	# Import required classes from the pdfminer.six library
	from pdfminer.pdfparser import PDFParser
	from pdfminer.pdfdocument import PDFDocument
	from pdfminer.pdfpage import PDFPage
	from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
	from pdfminer.converter import PDFPageAggregator

	# Open the PDF file
	with open('documentprocessing.pdf', 'rb') as pdf_file:
	# Import extract_text_to_fp function from pdfminer.high_level module
	from pdfminer.high_level import extract_text_to_fp

	# Import BytesIO class from io module
	from io import BytesIO

	# Specify the PDF file you want to convert to HTML
	pdf_file = 'documentprocessing.pdf'

	# Create an in-memory buffer to store the HTML output
	// Include pdfkit library and fs module of Node.js
	const PDFDocument = require('pdfkit');
	const fs = require('fs');

	// Create a new PDF document
	const doc = new PDFDocument();

	// Create a writable stream to save the PDF
	const stream = fs.createWriteStream('annotations.pdf');