conholdate-gists/ExtractTextfromDOCX_csharp

## ExtractTextfromDOCX_csharp
Extract Text from DOC or DOCX using C#
1. Extract Text from DOCX using C#
2. Get Formatted Text from DOCX using C#
3. Extract Formatted Text from Pages using C#

## ExtractTextfromDOCX_csharp_FormattedText.cs
// Create an instance of Parser class
Parser parser = new Parser(@"C:\Files\sample.docx");

// Extract a formatted text into the reader
using (TextReader reader = parser.GetFormattedText(new FormattedTextOptions(FormattedTextMode.Html)))
{
    // Print a formatted text from the document
    // If formatted text extraction isn't supported, a reader is null
    Console.WriteLine(reader == null ? "Formatted text extraction isn't suppported" : reader.ReadToEnd());
}

## ExtractTextfromDOCX_csharp_FormattedTextFromPages.cs
// Create an instance of Parser class
using (Parser parser = new Parser(@"C:\Files\sample.docx"))
{
    // Check if the document supports formatted text extraction
    if (!parser.Features.FormattedText)
    {
        Console.WriteLine("Document isn't supports formatted text extraction.");
        return;
    }

    // Get the document info
    IDocumentInfo documentInfo = parser.GetDocumentInfo();
    // Check if the document has pages
    if (documentInfo.PageCount == 0)
    {
        Console.WriteLine("Document hasn't pages.");
        return;
    }

    // Iterate over pages
    for (int p = 0; p < documentInfo.PageCount; p++)
    {
        // Print a page number
        Console.WriteLine(string.Format("Page {0}/{1}", p + 1, documentInfo.PageCount));
        // Extract a formatted text into the reader
        using (TextReader reader = parser.GetFormattedText(p, new FormattedTextOptions(FormattedTextMode.Html)))
        {
            // Print a formatted text from the document
            // We ignore null-checking as we have checked formatted text extraction feature support earlier
            Console.WriteLine(reader.ReadToEnd());
        }
    }
}

## ExtractTextfromDOCX_csharp_PlainText.cs
// Create an instance of Parser class
Parser parser = new Parser(@"C:\Files\sample.docx");

// Extract a text into the reader
using (TextReader reader = parser.GetText())
{
    // Print a text from the document
    // If text extraction isn't supported, a reader is null
    Console.WriteLine(reader == null ? "Text extraction isn't supported" : reader.ReadToEnd());
}
	Extract Text from DOC or DOCX using C#
	1. Extract Text from DOCX using C#
	2. Get Formatted Text from DOCX using C#
	3. Extract Formatted Text from Pages using C#
	// Create an instance of Parser class
	Parser parser = new Parser(@"C:\Files\sample.docx");

	// Extract a formatted text into the reader
	using (TextReader reader = parser.GetFormattedText(new FormattedTextOptions(FormattedTextMode.Html)))
	{
	// Print a formatted text from the document
	// If formatted text extraction isn't supported, a reader is null
	Console.WriteLine(reader == null ? "Formatted text extraction isn't suppported" : reader.ReadToEnd());
	}
	// Create an instance of Parser class
	using (Parser parser = new Parser(@"C:\Files\sample.docx"))
	{
	// Check if the document supports formatted text extraction
	if (!parser.Features.FormattedText)
	{
	Console.WriteLine("Document isn't supports formatted text extraction.");
	return;
	}

	// Get the document info
	IDocumentInfo documentInfo = parser.GetDocumentInfo();
	// Check if the document has pages
	if (documentInfo.PageCount == 0)
	{
	Console.WriteLine("Document hasn't pages.");
	return;
	}

	// Iterate over pages
	for (int p = 0; p < documentInfo.PageCount; p++)
	{
	// Print a page number
	Console.WriteLine(string.Format("Page {0}/{1}", p + 1, documentInfo.PageCount));
	// Extract a formatted text into the reader
	using (TextReader reader = parser.GetFormattedText(p, new FormattedTextOptions(FormattedTextMode.Html)))
	{
	// Print a formatted text from the document
	// We ignore null-checking as we have checked formatted text extraction feature support earlier
	Console.WriteLine(reader.ReadToEnd());
	}
	}
	}
	// Create an instance of Parser class
	Parser parser = new Parser(@"C:\Files\sample.docx");

	// Extract a text into the reader
	using (TextReader reader = parser.GetText())
	{
	// Print a text from the document
	// If text extraction isn't supported, a reader is null
	Console.WriteLine(reader == null ? "Text extraction isn't supported" : reader.ReadToEnd());
	}