From 5f89a10f5b872406a9ac6cb1fcdea8078ea6a4ca Mon Sep 17 00:00:00 2001 From: Bryan Berns Date: Tue, 7 Apr 2020 17:33:15 -0400 Subject: [PATCH] Added Support For Big Endian `UTF-32` (#11947) --- .../commands/utility/UtilityCommon.cs | 5 ++ .../utils/EncodingUtils.cs | 5 +- .../Parser/RedirectionOperator.Tests.ps1 | 56 ++++++++----------- .../Get-Content.Tests.ps1 | 1 + .../Format-Hex.Tests.ps1 | 7 +++ 5 files changed, 40 insertions(+), 34 deletions(-) diff --git a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/UtilityCommon.cs b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/UtilityCommon.cs index 5308f836d..e4ef04b96 100644 --- a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/UtilityCommon.cs +++ b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/UtilityCommon.cs @@ -43,6 +43,11 @@ namespace Microsoft.PowerShell.Commands /// BigEndianUnicode, + /// + /// Big Endian UTF32 encoding. + /// + BigEndianUTF32, + /// /// UTF8 encoding. /// diff --git a/src/System.Management.Automation/utils/EncodingUtils.cs b/src/System.Management.Automation/utils/EncodingUtils.cs index a843bc381..161ac7586 100644 --- a/src/System.Management.Automation/utils/EncodingUtils.cs +++ b/src/System.Management.Automation/utils/EncodingUtils.cs @@ -16,6 +16,7 @@ namespace System.Management.Automation internal const string String = "string"; internal const string Unicode = "unicode"; internal const string BigEndianUnicode = "bigendianunicode"; + internal const string BigEndianUtf32 = "bigendianutf32"; internal const string Ascii = "ascii"; internal const string Utf8 = "utf8"; internal const string Utf8NoBom = "utf8NoBOM"; @@ -25,13 +26,14 @@ namespace System.Management.Automation internal const string Default = "default"; internal const string OEM = "oem"; internal static readonly string[] TabCompletionResults = { - Ascii, BigEndianUnicode, OEM, Unicode, Utf7, Utf8, Utf8Bom, Utf8NoBom, Utf32 + Ascii, BigEndianUnicode, BigEndianUtf32, OEM, Unicode, Utf7, Utf8, Utf8Bom, Utf8NoBom, Utf32 }; internal static Dictionary encodingMap = new Dictionary(StringComparer.OrdinalIgnoreCase) { { Ascii, System.Text.Encoding.ASCII }, { BigEndianUnicode, System.Text.Encoding.BigEndianUnicode }, + { BigEndianUtf32, new UTF32Encoding(bigEndian: true, byteOrderMark: true) }, { Default, ClrFacade.GetDefaultEncoding() }, { OEM, ClrFacade.GetOEMEncoding() }, { Unicode, System.Text.Encoding.Unicode }, @@ -116,6 +118,7 @@ namespace System.Management.Automation public ArgumentEncodingCompletionsAttribute() : base( EncodingConversion.Ascii, EncodingConversion.BigEndianUnicode, + EncodingConversion.BigEndianUtf32, EncodingConversion.OEM, EncodingConversion.Unicode, EncodingConversion.Utf7, diff --git a/test/powershell/Language/Parser/RedirectionOperator.Tests.ps1 b/test/powershell/Language/Parser/RedirectionOperator.Tests.ps1 index ed4e320d7..466f15bd6 100644 --- a/test/powershell/Language/Parser/RedirectionOperator.Tests.ps1 +++ b/test/powershell/Language/Parser/RedirectionOperator.Tests.ps1 @@ -45,42 +45,32 @@ Describe "Redirection operator now supports encoding changes" -Tags "CI" { } } - # $availableEncodings = "unknown","string","unicode","bigendianunicode","utf8","utf7", "utf32","ascii","default","oem" - $availableEncodings = (Get-Command Out-File).Parameters["Encoding"].Attributes.ValidValues - + $availableEncodings = + @([System.Text.Encoding]::ASCII + [System.Text.Encoding]::BigEndianUnicode + [System.Text.UTF32Encoding]::new($true,$true) + [System.Text.Encoding]::Unicode + [System.Text.Encoding]::UTF7 + [System.Text.Encoding]::UTF8 + [System.Text.Encoding]::UTF32) + foreach($encoding in $availableEncodings) { - $skipTest = $false - if ($encoding -eq "default") { - # [System.Text.Encoding]::Default is exposed by 'System.Private.CoreLib.dll' at - # runtime via reflection. However,it isn't exposed in the reference contract of - # 'System.Text.Encoding', and therefore we cannot use 'Encoding.Default' in our - # code. So we need to skip this encoding in the test. - $skipTest = $true - } - # some of the encodings accepted by Out-File aren't real, - # and Out-File has its own translation, so we'll - # not do that logic here, but simply ignore those encodings - # as they eventually are translated to "real" encoding - $enc = [System.Text.Encoding]::$encoding - if ( $enc ) - { - $msg = "Overriding encoding for Out-File is respected for $encoding" - $BOM = $enc.GetPreamble() - $TXT = $enc.GetBytes($asciiString) - $CR = $enc.GetBytes($asciiCR) - $expectedBytes = .{ $BOM; $TXT; $CR } - $PSDefaultParameterValues["Out-File:Encoding"] = "$encoding" - $asciiString > TESTDRIVE:/file.txt - $observedBytes = Get-Content -AsByteStream TESTDRIVE:/file.txt - # THE TEST - It $msg -Skip:$skipTest { - $observedBytes.Count | Should -Be $expectedBytes.Count - for($i = 0;$i -lt $observedBytes.Count; $i++) { - $observedBytes[$i] | Should -Be $expectedBytes[$i] - } + $encodingName = $encoding.EncodingName + $msg = "Overriding encoding for Out-File is respected for $encodingName" + $BOM = $encoding.GetPreamble() + $TXT = $encoding.GetBytes($asciiString) + $CR = $encoding.GetBytes($asciiCR) + $expectedBytes = @( $BOM; $TXT; $CR ) + $PSDefaultParameterValues["Out-File:Encoding"] = $encoding + $asciiString > TESTDRIVE:/file.txt + $observedBytes = Get-Content -AsByteStream TESTDRIVE:/file.txt + # THE TEST + It $msg { + $observedBytes.Count | Should -Be $expectedBytes.Count + for($i = 0;$i -lt $observedBytes.Count; $i++) { + $observedBytes[$i] | Should -Be $expectedBytes[$i] } - } } } diff --git a/test/powershell/Modules/Microsoft.PowerShell.Management/Get-Content.Tests.ps1 b/test/powershell/Modules/Microsoft.PowerShell.Management/Get-Content.Tests.ps1 index 1d5ec99a9..6c50cee5a 100644 --- a/test/powershell/Modules/Microsoft.PowerShell.Management/Get-Content.Tests.ps1 +++ b/test/powershell/Modules/Microsoft.PowerShell.Management/Get-Content.Tests.ps1 @@ -98,6 +98,7 @@ Describe "Get-Content" -Tags "CI" { @{EncodingName = 'OEM'}, @{EncodingName = 'Unicode'}, @{EncodingName = 'BigEndianUnicode'}, + @{EncodingName = 'BigEndianUTF32'}, @{EncodingName = 'UTF8'}, @{EncodingName = 'UTF8BOM'}, @{EncodingName = 'UTF8NoBOM'}, diff --git a/test/powershell/Modules/Microsoft.PowerShell.Utility/Format-Hex.Tests.ps1 b/test/powershell/Modules/Microsoft.PowerShell.Utility/Format-Hex.Tests.ps1 index 6bc7e06a5..0ae47aac8 100644 --- a/test/powershell/Modules/Microsoft.PowerShell.Utility/Format-Hex.Tests.ps1 +++ b/test/powershell/Modules/Microsoft.PowerShell.Utility/Format-Hex.Tests.ps1 @@ -432,6 +432,13 @@ public enum TestSByteEnum : sbyte { Count = 1 ExpectedResult = "0000000000000000 00 68 00 65 00 6C 00 6C 00 6F h e l l o" } + @{ + Name = "Can process BigEndianUTF32 encoding 'fhx -InputObject 'hello' -Encoding BigEndianUTF32'" + Encoding = "BigEndianUTF32" + Count = 2 + ExpectedResult = "0000000000000000 00 00 00 68 00 00 00 65 00 00 00 6C 00 00 00 6C h e l l" + ExpectedSecondResult = "0000000000000010 00 00 00 6F o" + } @{ Name = "Can process Unicode encoding 'fhx -InputObject 'hello' -Encoding Unicode'" Encoding = "Unicode"