You can read all details at: Convert PDF to TXT or TXT to PDF using C# VB.NET
Created
January 19, 2021 22:14
-
-
Save aspose-com-gists/21b0333293862a8730041a424ce1c5cc to your computer and use it in GitHub Desktop.
Convert PDF to TXT or Text to PDF Programmatically using C# VB.NET
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Open document | |
Document pdfDocument = new Document(dataDir + "MultiColumnPdf.pdf"); | |
StringBuilder builder = new StringBuilder(); | |
// String to hold extracted text | |
string extractedText = ""; | |
foreach (Page pdfPage in pdfDocument.Pages) | |
{ | |
using (MemoryStream textStream = new MemoryStream()) | |
{ | |
// Create text device | |
TextDevice textDevice = new TextDevice(); | |
// Set different options | |
TextExtractionOptions options = new | |
TextExtractionOptions(TextExtractionOptions.TextFormattingMode.Pure); | |
textDevice.ExtractionOptions = options; | |
// Convert the page and save text to the stream | |
textDevice.Process(pdfPage, textStream); | |
// Close memory stream | |
textStream.Close(); | |
// Get text from memory stream | |
extractedText = Encoding.Unicode.GetString(textStream.ToArray()); | |
} | |
builder.Append(extractedText); | |
} | |
dataDir = dataDir + "PDF_to_TXT_Pure.txt"; | |
// Save the text file | |
File.WriteAllText(dataDir, builder.ToString()); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Open document | |
Document pdfDocument = new Document(dataDir + "MultiColumnPdf.pdf"); | |
StringBuilder builder = new StringBuilder(); | |
// String to hold extracted text | |
string extractedText = ""; | |
foreach (Page pdfPage in pdfDocument.Pages) | |
{ | |
using (MemoryStream textStream = new MemoryStream()) | |
{ | |
// Create text device | |
TextDevice textDevice = new TextDevice(); | |
// Set different options | |
TextExtractionOptions options = new | |
TextExtractionOptions(TextExtractionOptions.TextFormattingMode.Raw); | |
textDevice.ExtractionOptions = options; | |
// Convert the page and save text to the stream | |
textDevice.Process(pdfPage, textStream); | |
// Close memory stream | |
textStream.Close(); | |
// Get text from memory stream | |
extractedText = Encoding.Unicode.GetString(textStream.ToArray()); | |
} | |
builder.Append(extractedText); | |
} | |
dataDir = dataDir + "PDF_to_TXT_Raw.txt"; | |
// Save the text file | |
File.WriteAllText(dataDir, builder.ToString()); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Read input TXT file | |
System.IO.TextReader tr = new StreamReader(dataDir + "Test.txt", Encoding.UTF8, true); | |
// Initialize new Document | |
Document doc = new Document(); | |
// Add blank page | |
Page page = doc.Pages.Add(); | |
String strLine; | |
// Initiate TextBuilder object | |
TextBuilder builder = new TextBuilder(page); | |
double x = 100; double y = 100; | |
while ((strLine = tr.ReadLine()) != null) | |
{ | |
TextFragment text = new TextFragment(strLine); | |
text.Position = new Position(x, y); | |
if (y >= page.PageInfo.Height - 72) | |
{ | |
y = 100; | |
page = doc.Pages.Add(); | |
builder = new TextBuilder(page); | |
} | |
else | |
{ | |
y += 15; | |
} | |
builder.AppendText(text); | |
} | |
// Save output PDF file | |
doc.Save(dataDir + "TexttoPDF.pdf"); | |
tr.Close(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment