PowerShell Team c748652c34 Copy all mapped files from [SD:725290]
commit 8cec8f150da7583b7af5efbe2853efee0179750c
2016-07-28 23:23:03 -07:00

509 lines
17 KiB

Copyright (c) Microsoft Corporation. All rights reserved.
using System;
using System.Management.Automation;
using System.Text.RegularExpressions;
using System.Collections.Generic;
using mshtml;
using System.Diagnostics;
using System.Threading;
using ExecutionContext = System.Management.Automation.ExecutionContext;
namespace Microsoft.PowerShell.Commands
/// <summary>
/// Response object for html content
/// </summary>
public partial class HtmlWebResponseObject : WebResponseObject, IDisposable
#region Properties
/// <summary>
/// gets or protected sets the Content property
/// </summary>
public new string Content { get; private set; }
// The HTML document
private IHTMLDocument2 _parsedHtml;
// The reset event for synchronizing the 'IHTMLDocument2.write()' call
private ManualResetEventSlim _stateChangeResetEvent;
// The reset event for synchronizing loading the document
private ManualResetEventSlim _loadDocumentResetEvent;
// The handler for the 'onreadystatechange' event
private HTMLDocumentEvents2_onreadystatechangeEventHandler _onreadystatechangeEventHandler;
// The exception thrown during the parsing
private Exception _parsingException;
// The current execution context
private readonly ExecutionContext _executionContext;
// The flag that notifies the worker thread to stop loading the document
private bool _stopWorkerThread;
// The flag that indicates the html is parsed
private bool _htmlParsed = false;
/// <summary>
/// gets the ParsedHtml property
/// </summary>
public IHTMLDocument2 ParsedHtml
return _parsedHtml;
private FormObjectCollection _forms;
/// <summary>
/// gets the Forms property
/// </summary>
public FormObjectCollection Forms
if (_forms == null)
_forms = BuildFormsCollection();
return _forms;
private WebCmdletElementCollection _inputFields;
/// <summary>
/// gets the Fields property
/// </summary>
public WebCmdletElementCollection InputFields
if (_inputFields == null)
List<PSObject> parsedFields = new List<PSObject>();
foreach (IHTMLElement element in _parsedHtml.all)
if (element.tagName.Equals("INPUT", StringComparison.OrdinalIgnoreCase))
parsedFields.Add(CreateHtmlObject(element, true));
_inputFields = new WebCmdletElementCollection(parsedFields);
return _inputFields;
private WebCmdletElementCollection _links;
/// <summary>
/// gets the Links property
/// </summary>
public WebCmdletElementCollection Links
if (_links == null)
List<PSObject> parsedLinks = new List<PSObject>();
foreach (IHTMLElement element in _parsedHtml.links)
parsedLinks.Add(CreateHtmlObject(element, true));
_links = new WebCmdletElementCollection(parsedLinks);
return _links;
private WebCmdletElementCollection _images;
/// <summary>
/// gets the Images property
/// </summary>
public WebCmdletElementCollection Images
if (_images == null)
List<PSObject> parsedImages = new List<PSObject>();
foreach (IHTMLElement element in _parsedHtml.images)
parsedImages.Add(CreateHtmlObject(element, true));
_images = new WebCmdletElementCollection(parsedImages);
return _images;
private WebCmdletElementCollection _scripts;
/// <summary>
/// gets the Scripts property
/// </summary>
public WebCmdletElementCollection Scripts
if (_scripts == null)
List<PSObject> parsedScripts = new List<PSObject>();
foreach (IHTMLElement element in _parsedHtml.scripts)
parsedScripts.Add(CreateHtmlObject(element, true));
_scripts = new WebCmdletElementCollection(parsedScripts);
return _scripts;
private WebCmdletElementCollection _allElements;
/// <summary>
/// gets the Elements property
/// </summary>
public WebCmdletElementCollection AllElements
if (_allElements == null)
List<PSObject> parsedElements = new List<PSObject>();
foreach (IHTMLElement element in _parsedHtml.all)
parsedElements.Add(CreateHtmlObject(element, true));
_allElements = new WebCmdletElementCollection(parsedElements);
return _allElements;
#endregion Properties
#region Private Fields
private static Regex _tagRegex;
private static Regex _attribsRegex;
private static Regex _attribNameValueRegex;
#endregion Private Fields
#region Methods
// The "onreadystatechange" event handler
private void ReadyStateChanged(IHTMLEventObj obj)
if (String.Equals("complete", _parsedHtml.readyState, StringComparison.OrdinalIgnoreCase))
// Load the document in a worker thread
private void LoadDocumentInMtaThread(Object state)
// Create a new IHTMLDocument2 object
_parsedHtml = (IHTMLDocument2)new HTMLDocument();
// Attach the event handler
var events = (HTMLDocumentEvents2_Event)_parsedHtml;
events.onreadystatechange += _onreadystatechangeEventHandler;
// Write the content and close the document
// Wait for the onReadyStateChange event to be fired. On IE9, this never happens
// so we check the readyState directly as well.
bool wait = true;
while (wait && !_stopWorkerThread)
if (String.Equals("complete", _parsedHtml.readyState, StringComparison.OrdinalIgnoreCase))
wait = !_stateChangeResetEvent.Wait(100);
// Detach the event handler
events.onreadystatechange -= _onreadystatechangeEventHandler;
catch (Exception e)
_parsingException = e;
private void EnsureHtmlParser()
if (_htmlParsed == false)
// Initialization
_stopWorkerThread = false;
_parsingException = null;
_stateChangeResetEvent = new ManualResetEventSlim();
_loadDocumentResetEvent = new ManualResetEventSlim();
_onreadystatechangeEventHandler = new HTMLDocumentEvents2_onreadystatechangeEventHandler(ReadyStateChanged);
// The IHTMLDocument events cannot be handled in STA ApartmentState, so we use a worker thread to load the document
ThreadPool.QueueUserWorkItem(new WaitCallback(LoadDocumentInMtaThread));
// Wait for the worker thread to finish loading the document. In the meantime, we check the Ctrl-C every 500ms
bool wait = true;
while (wait)
if (_executionContext.CurrentPipelineStopping)
// Signal and wait for the worker thread to exit, then break out the loop
_stopWorkerThread = true;
wait = !_loadDocumentResetEvent.Wait(500);
// Ctrl-C is typed
if (_executionContext.CurrentPipelineStopping)
throw new PipelineStoppedException();
// If there is no Ctrl-C, throw if an exception happened during the parsing
if (_parsingException != null)
throw _parsingException;
// Parsing was successfull
_htmlParsed = true;
if (_tagRegex == null)
_tagRegex = new Regex(@"<\w+((\s+[^""'>/=\s\p{Cc}]+(\s*=\s*(?:"".*?""|'.*?'|[^'"">\s]+))?)+\s*|\s*)/?>",
RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled);
if (_attribsRegex == null)
_attribsRegex = new Regex(@"(?<=\s+)([^""'>/=\s\p{Cc}]+(\s*=\s*(?:"".*?""|'.*?'|[^'"">\s]+))?)",
RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled);
if (_attribNameValueRegex == null)
_attribNameValueRegex = new Regex(@"([^""'>/=\s\p{Cc}]+)(?:\s*=\s*(?:""(.*?)""|'(.*?)'|([^'"">\s]+)))?",
RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled);
private PSObject CreateHtmlObject(IHTMLElement element, bool addTagName)
PSObject elementObject = new PSObject();
elementObject.Properties.Add(new PSNoteProperty("innerHTML", element.innerHTML));
elementObject.Properties.Add(new PSNoteProperty("innerText", element.innerText));
elementObject.Properties.Add(new PSNoteProperty("outerHTML", element.outerHTML));
elementObject.Properties.Add(new PSNoteProperty("outerText", element.outerText));
if (addTagName)
elementObject.Properties.Add(new PSNoteProperty("tagName", element.tagName));
ParseAttributes(element.outerHTML, elementObject);
return elementObject;
private void ParseAttributes(string outerHtml, PSObject elementObject)
// We might get an empty input for a directive from the HTML file
if (!string.IsNullOrEmpty(outerHtml))
// Extract just the opening tag of the HTML element (omitting the closing tag and any contents,
// including contained HTML elements)
var match = _tagRegex.Match(outerHtml);
// Extract all the attribute specifications within the HTML element opening tag
var attribMatches = _attribsRegex.Matches(match.Value);
foreach (Match attribMatch in attribMatches)
// Extract the name and value for this attribute (allowing for variations like single/double/no
// quotes, and no value at all)
var nvMatches = _attribNameValueRegex.Match(attribMatch.Value);
Debug.Assert(nvMatches.Groups.Count == 5);
// Name is always captured by group #1
string name = nvMatches.Groups[1].Value;
// The value (if any) is captured by group #2, #3, or #4, depending on quoting or lack thereof
string value = null;
if (nvMatches.Groups[2].Success)
value = nvMatches.Groups[2].Value;
else if (nvMatches.Groups[3].Success)
value = nvMatches.Groups[3].Value;
else if (nvMatches.Groups[4].Success)
value = nvMatches.Groups[4].Value;
elementObject.Properties.Add(new PSNoteProperty(name, value));
private FormObjectCollection BuildFormsCollection()
FormObjectCollection forms = new FormObjectCollection();
foreach (IHTMLFormElement form in _parsedHtml.forms)
string id = GetElementId(form as IHTMLElement);
if (null == id)
id = form.name;
FormObject f = new FormObject(id, form.method, form.action);
foreach (IHTMLElement element in form)
IHTMLInputElement input = element as IHTMLInputElement;
if (null != input)
id = GetElementId(input as IHTMLElement);
if (null == id)
id = input.name;
f.AddField(id, input.value);
return (forms);
private string GetElementId(IHTMLElement element)
return (null == element ? null : element.id);
/// <summary>
/// Reads the response content from the web response.
/// </summary>
private void InitializeContent()
string contentType = ContentHelper.GetContentType(BaseResponse);
if (ContentHelper.IsText(contentType))
// fill the Content buffer
string characterSet = WebResponseHelper.GetCharacterSet(BaseResponse);
this.Content = StreamHelper.DecodeStream(RawContentStream, characterSet);
this.Content = string.Empty;
#endregion Methods
/// <summary>
/// Dispose the the instance of the class.
/// </summary>
public void Dispose()
if (_loadDocumentResetEvent != null)
if (_stateChangeResetEvent != null)
/// <summary>
/// Finalizer to free the COM objects.
/// </summary>
private void CleanupNativeResources()
if (_parsedHtml != null)