Skip to content

Instantly share code, notes, and snippets.

@aspose-com-gists
Created January 19, 2021 22:14
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save aspose-com-gists/21b0333293862a8730041a424ce1c5cc to your computer and use it in GitHub Desktop.
Save aspose-com-gists/21b0333293862a8730041a424ce1c5cc to your computer and use it in GitHub Desktop.
Convert PDF to TXT or Text to PDF Programmatically using C# VB.NET
// Open document
Document pdfDocument = new Document(dataDir + "MultiColumnPdf.pdf");
StringBuilder builder = new StringBuilder();
// String to hold extracted text
string extractedText = "";
foreach (Page pdfPage in pdfDocument.Pages)
{
using (MemoryStream textStream = new MemoryStream())
{
// Create text device
TextDevice textDevice = new TextDevice();
// Set different options
TextExtractionOptions options = new
TextExtractionOptions(TextExtractionOptions.TextFormattingMode.Pure);
textDevice.ExtractionOptions = options;
// Convert the page and save text to the stream
textDevice.Process(pdfPage, textStream);
// Close memory stream
textStream.Close();
// Get text from memory stream
extractedText = Encoding.Unicode.GetString(textStream.ToArray());
}
builder.Append(extractedText);
}
dataDir = dataDir + "PDF_to_TXT_Pure.txt";
// Save the text file
File.WriteAllText(dataDir, builder.ToString());
// Open document
Document pdfDocument = new Document(dataDir + "MultiColumnPdf.pdf");
StringBuilder builder = new StringBuilder();
// String to hold extracted text
string extractedText = "";
foreach (Page pdfPage in pdfDocument.Pages)
{
using (MemoryStream textStream = new MemoryStream())
{
// Create text device
TextDevice textDevice = new TextDevice();
// Set different options
TextExtractionOptions options = new
TextExtractionOptions(TextExtractionOptions.TextFormattingMode.Raw);
textDevice.ExtractionOptions = options;
// Convert the page and save text to the stream
textDevice.Process(pdfPage, textStream);
// Close memory stream
textStream.Close();
// Get text from memory stream
extractedText = Encoding.Unicode.GetString(textStream.ToArray());
}
builder.Append(extractedText);
}
dataDir = dataDir + "PDF_to_TXT_Raw.txt";
// Save the text file
File.WriteAllText(dataDir, builder.ToString());
// Read input TXT file
System.IO.TextReader tr = new StreamReader(dataDir + "Test.txt", Encoding.UTF8, true);
// Initialize new Document
Document doc = new Document();
// Add blank page
Page page = doc.Pages.Add();
String strLine;
// Initiate TextBuilder object
TextBuilder builder = new TextBuilder(page);
double x = 100; double y = 100;
while ((strLine = tr.ReadLine()) != null)
{
TextFragment text = new TextFragment(strLine);
text.Position = new Position(x, y);
if (y >= page.PageInfo.Height - 72)
{
y = 100;
page = doc.Pages.Add();
builder = new TextBuilder(page);
}
else
{
y += 15;
}
builder.AppendText(text);
}
// Save output PDF file
doc.Save(dataDir + "TexttoPDF.pdf");
tr.Close();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment