pierre3/Get-WebText.ps1

## Get-WebText.ps1
<#
.Synopsis
    WebページからTextを取得
.DESCRIPTION
    指定したUrlのHTMLドキュメントからTextノードのみを抽出します。
.EXAMPLE
    Get-WebText "http://example.com"
.EXAMPLE
    "http://example1.com","http://example2.com","http://example3.com" | Get-WebText
#>
function Get-WebText
{
    [CmdletBinding()]
    Param(
        # Url
        [Parameter(Mandatory=$true,
                   ValueFromPipeline=$true,
                   Position=0)]
        [string[]]
        $InputUrl
    )

    Begin
    {
        # HTMLDocumentのノードを再帰的に辿り、Textノードのみを返却する
        function Get-Descendants
        {
            Param([object]$nodes)
            if(-not $nodes.hasChildNodes){ return }
            if($nodes.nodeName -eq "script"){return}
            foreach($child in $nodes.ChildNodes)
            {
                if($child.nodeName -eq "#text")
                {
                    if(-not[String]::IsNullOrWhiteSpace($child.nodeValue))
                    {
                        return $child.nodeValue
                    }
                }
                Get-Descendants $child
            }
        }
    }

    Process
    {
        foreach($Url in $InputUrl)
        {
            Invoke-WebRequest $Url | % { Get-Descendants $_.ParsedHtml }
        }
    }
    End {}
}
	<#
	.Synopsis
	WebページからTextを取得
	.DESCRIPTION
	指定したUrlのHTMLドキュメントからTextノードのみを抽出します。
	.EXAMPLE
	Get-WebText "http://example.com"
	.EXAMPLE
	"http://example1.com","http://example2.com","http://example3.com" \| Get-WebText
	#>
	function Get-WebText
	{
	[CmdletBinding()]
	Param(
	# Url
	[Parameter(Mandatory=$true,
	ValueFromPipeline=$true,
	Position=0)]
	[string[]]
	$InputUrl
	)

	Begin
	{
	# HTMLDocumentのノードを再帰的に辿り、Textノードのみを返却する
	function Get-Descendants
	{
	Param([object]$nodes)
	if(-not $nodes.hasChildNodes){ return }
	if($nodes.nodeName -eq "script"){return}
	foreach($child in $nodes.ChildNodes)
	{
	if($child.nodeName -eq "#text")
	{
	if(-not[String]::IsNullOrWhiteSpace($child.nodeValue))
	{
	return $child.nodeValue
	}
	}
	Get-Descendants $child
	}
	}
	}

	Process
	{
	foreach($Url in $InputUrl)
	{
	Invoke-WebRequest $Url \| % { Get-Descendants $_.ParsedHtml }
	}
	}
	End {}
	}