Return to Snippet

Revision: 50500
at August 25, 2011 00:15 by danfsmith


Initial Code
param ($path, $urlpath)

add-type -Path f:\dan\tools\html-agility-pack\HtmlAgilityPack.dll
$files = Get-ChildItem -Include *.htm,*.aspx -Path $path -Recurse
$doc = New-Object HtmlAgilityPack.HtmlDocument 
$result = $files | % { 
	Write-Host "Checking $_"
	$name = $_.FullName.Replace($path,$urlpath).Replace("\", "/")
	$htmldoc = $doc.Load($_.FullName) 
	$linknodes = $doc.DocumentNode.SelectNodes("//a")
	if ($linknodes) {
		foreach ($node in $linknodes) {	
			if ($node.GetAttributeValue("href", "").ToLower().Contains("pdf"))
			{
				Write-Host "Found" $node.GetAttributeValue("href", "")
				$pdflink = $node.GetAttributeValue("href", "")
				$line = $node.Line
				New-Object PsObject -Property @{PdfLink = $pdflink; FileName = $name; LineNumber = $line;}
			}
		}
	}
}
$result | Sort PdfLink

Initial URL

                                

Initial Description

                                

Initial Title
Get All PDF links from HTML files

Initial Tags
html

Initial Language
Windows PowerShell