Improve performance of Import-CSV up to 10 times (#7413)

Speed up the creation of PSObjects in CSV cmdlets by

- Use the overload Add(PSMember, bool prevalidated) for all objects except the first when adding the `NoteProperty` members.
- Add a new constructor to PSObject that preallocates the `_instanceMembers` collection with an initial capacity.
- Improve performance of AddToTypesXmlCache by avoiding an expensive copying of members just to check for the existence of one of them, perf is significantly increased, and allocations are reduced.
- Reduce allocations and GC pressure by preallocating and reusing `StringBuilders` and `List<string>` for line parsing in CSV cmdlets.
- Use `List<string>` instead of `Collection<string>` to get fewer virtual calls and better inlining.
- Reduce allocations by using a preallocated value factory in `TypeTable.GetMembers(ConsolidatedString types)`.
- Replace a `Linq.Any()` with a `List.Count > 0` in binder code.

The main gain is from taking advantage of the fact that all objects created by `Import-Csv` have the same shape (the same properties).
This commit is contained in:
Staffan Gustafsson 2018-08-06 19:58:55 +02:00 committed by Dongbo Wang
parent f0ea0d380f
commit d620c4fd1e
5 changed files with 109 additions and 76 deletions

View file

@ -1149,6 +1149,12 @@ namespace Microsoft.PowerShell.Commands
/// </summary> /// </summary>
private readonly StreamReader _sr; private readonly StreamReader _sr;
// Initial sizes of the value list and the line stringbuilder.
// Set to reasonable initial sizes. They may grow beyond these,
// but this will prevent a few reallocations.
private const int ValueCountGuestimate = 16;
private const int LineLengthGuestimate = 256;
internal ImportCsvHelper(PSCmdlet cmdlet, char delimiter, IList<string> header, string typeName, StreamReader streamReader) internal ImportCsvHelper(PSCmdlet cmdlet, char delimiter, IList<string> header, string typeName, StreamReader streamReader)
{ {
if (cmdlet == null) if (cmdlet == null)
@ -1235,9 +1241,11 @@ namespace Microsoft.PowerShell.Commands
TypeName = ReadTypeInformation(); TypeName = ReadTypeInformation();
} }
var values = new List<string>(ValueCountGuestimate);
var builder = new StringBuilder(LineLengthGuestimate);
while ((Header == null) && (!this.EOF)) while ((Header == null) && (!this.EOF))
{ {
Collection<string> values = ParseNextRecord(); ParseNextRecord(values, builder);
// Trim all trailing blankspaces and delimiters ( single/multiple ). // Trim all trailing blankspaces and delimiters ( single/multiple ).
// If there is only one element in the row and if its a blankspace we dont trim it. // If there is only one element in the row and if its a blankspace we dont trim it.
@ -1278,9 +1286,12 @@ namespace Microsoft.PowerShell.Commands
{ {
_alreadyWarnedUnspecifiedName = alreadyWriteOutWarning; _alreadyWarnedUnspecifiedName = alreadyWriteOutWarning;
ReadHeader(); ReadHeader();
var prevalidated = false;
var values = new List<string>(ValueCountGuestimate);
var builder = new StringBuilder(LineLengthGuestimate);
while (true) while (true)
{ {
Collection<string> values = ParseNextRecord(); ParseNextRecord(values, builder);
if (values.Count == 0) if (values.Count == 0)
break; break;
@ -1290,7 +1301,8 @@ namespace Microsoft.PowerShell.Commands
continue; continue;
} }
PSObject result = BuildMshobject(TypeName, Header, values, _delimiter); PSObject result = BuildMshobject(TypeName, Header, values, _delimiter, prevalidated);
prevalidated = true;
_cmdlet.WriteObject(result); _cmdlet.WriteObject(result);
} }
alreadyWriteOutWarning = _alreadyWarnedUnspecifiedName; alreadyWriteOutWarning = _alreadyWarnedUnspecifiedName;
@ -1365,13 +1377,12 @@ namespace Microsoft.PowerShell.Commands
/// <returns> /// <returns>
/// Parsed collection of strings. /// Parsed collection of strings.
/// </returns> /// </returns>
private Collection<string> private void
ParseNextRecord() ParseNextRecord(List<string> result, StringBuilder current)
{ {
// Collection of strings to return result.Clear();
Collection<string> result = new Collection<string>();
// current string // current string
StringBuilder current = new StringBuilder(); current.Clear();
bool seenBeginQuote = false; bool seenBeginQuote = false;
// int i = 0; // int i = 0;
@ -1519,8 +1530,6 @@ namespace Microsoft.PowerShell.Commands
{ {
result.Add(current.ToString()); result.Add(current.ToString());
} }
return result;
} }
// If we detect a newline we return it as a string "\r", "\n" or "\r\n" // If we detect a newline we return it as a string "\r", "\n" or "\r\n"
@ -1611,10 +1620,10 @@ namespace Microsoft.PowerShell.Commands
private private
PSObject PSObject
BuildMshobject(string type, IList<string> names, Collection<string> values, char delimiter) BuildMshobject(string type, IList<string> names, List<string> values, char delimiter, bool preValidated = false)
{ {
//string[] namesarray = null; //string[] namesarray = null;
PSObject result = new PSObject(); PSObject result = new PSObject(names.Count);
char delimiterlocal = delimiter; char delimiterlocal = delimiter;
int unspecifiedNameIndex = 1; int unspecifiedNameIndex = 1;
for (int i = 0; i <= names.Count - 1; i++) for (int i = 0; i <= names.Count - 1; i++)
@ -1635,7 +1644,8 @@ namespace Microsoft.PowerShell.Commands
{ {
value = values[i]; value = values[i];
} }
result.Properties.Add(new PSNoteProperty(name, value));
result.Properties.Add(new PSNoteProperty(name, value), preValidated);
} }
if (!_alreadyWarnedUnspecifiedName && unspecifiedNameIndex != 1) if (!_alreadyWarnedUnspecifiedName && unspecifiedNameIndex != 1)
@ -1644,7 +1654,7 @@ namespace Microsoft.PowerShell.Commands
_alreadyWarnedUnspecifiedName = true; _alreadyWarnedUnspecifiedName = true;
} }
if (type != null && type.Length > 0) if (!string.IsNullOrEmpty(type))
{ {
result.TypeNames.Clear(); result.TypeNames.Clear();
result.TypeNames.Add(type); result.TypeNames.Add(type);

View file

@ -4019,6 +4019,14 @@ namespace System.Management.Automation
_members = new OrderedDictionary(StringComparer.OrdinalIgnoreCase); _members = new OrderedDictionary(StringComparer.OrdinalIgnoreCase);
} }
/// <summary>
/// Constructs this collection with an initial capacity
/// </summary>
internal PSMemberInfoInternalCollection(int capacity)
{
_members = new OrderedDictionary(capacity, StringComparer.OrdinalIgnoreCase);
}
private void Replace(T oldMember, T newMember) private void Replace(T oldMember, T newMember)
{ {
_members[newMember.Name] = newMember; _members[newMember.Name] = newMember;
@ -4567,8 +4575,9 @@ namespace System.Management.Automation
TypeTable typeTable = _mshOwner.GetTypeTable(); TypeTable typeTable = _mshOwner.GetTypeTable();
if (typeTable != null) if (typeTable != null)
{ {
PSMemberInfoInternalCollection<T> typesXmlMembers = typeTable.GetMembers<T>(_mshOwner.InternalTypeNames); var typesXmlMembers = typeTable.GetMembers(_mshOwner.InternalTypeNames);
if (typesXmlMembers[member.Name] != null) var typesXmlMember = typesXmlMembers[member.Name];
if (typesXmlMember is T)
{ {
throw new ExtendedTypeSystemException( throw new ExtendedTypeSystemException(
"AlreadyPresentInTypesXml", "AlreadyPresentInTypesXml",

View file

@ -521,6 +521,16 @@ namespace System.Management.Automation
CommonInitialization(PSCustomObject.SelfInstance); CommonInitialization(PSCustomObject.SelfInstance);
} }
/// <summary>
/// Initializes a new instance of PSObject with an PSCustomObject BaseObject
/// with an initial capacity for members
/// </summary>
/// <param name="instanceMemberCapacity">The initial capacity for the instance member collection.</param>
public PSObject(int instanceMemberCapacity) : this()
{
_instanceMembers = new PSMemberInfoInternalCollection<PSMemberInfo>(instanceMemberCapacity);
}
/// <summary> /// <summary>
/// Initializes a new instance of PSObject wrapping obj (accessible through BaseObject). /// Initializes a new instance of PSObject wrapping obj (accessible through BaseObject).
/// </summary> /// </summary>

View file

@ -2592,7 +2592,10 @@ namespace System.Management.Automation.Runspaces
// this is used to throw errors when updating a shared TypeTable. // this is used to throw errors when updating a shared TypeTable.
internal readonly bool isShared; internal readonly bool isShared;
private List<string> _typeFileList; private readonly List<string> _typeFileList;
// The member factory is cached to avoid allocating Func<> delegates on each call
private readonly Func<string, ConsolidatedString, PSMemberInfoInternalCollection<PSMemberInfo>> _memberFactoryFunc;
// This holds all the type information that is in the typetable // This holds all the type information that is in the typetable
// Holds file name if types file was used to update the types // Holds file name if types file was used to update the types
@ -3392,6 +3395,7 @@ namespace System.Management.Automation.Runspaces
{ {
this.isShared = isShared; this.isShared = isShared;
_typeFileList = new List<string>(); _typeFileList = new List<string>();
_memberFactoryFunc = MemberFactory;
} }
/// <summary> /// <summary>
@ -3461,16 +3465,13 @@ namespace System.Management.Automation.Runspaces
/// 1. There were errors loading TypeTable. Look in the Errors property to get /// 1. There were errors loading TypeTable. Look in the Errors property to get
/// detailed error messages. /// detailed error messages.
/// </exception> /// </exception>
internal TypeTable(IEnumerable<string> typeFiles, AuthorizationManager authorizationManager, PSHost host) internal TypeTable(IEnumerable<string> typeFiles, AuthorizationManager authorizationManager, PSHost host) : this(isShared: true)
{ {
if (typeFiles == null) if (typeFiles == null)
{ {
throw PSTraceSource.NewArgumentNullException("typeFiles"); throw PSTraceSource.NewArgumentNullException("typeFiles");
} }
isShared = true;
_typeFileList = new List<string>();
ConcurrentBag<string> errors = new ConcurrentBag<string>(); ConcurrentBag<string> errors = new ConcurrentBag<string>();
foreach (string typefile in typeFiles) foreach (string typefile in typeFiles)
{ {
@ -3510,13 +3511,10 @@ namespace System.Management.Automation.Runspaces
var retValueTable = new HashSet<string>(StringComparer.OrdinalIgnoreCase); var retValueTable = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
foreach (string type in types) foreach (string type in types)
{ {
PSMemberInfoInternalCollection<PSMemberInfo> typeMembers; if (!_extendedMembers.TryGetValue(type, out var typeMembers))
if (!_extendedMembers.TryGetValue(type, out typeMembers))
continue; continue;
PSMemberSet settings = typeMembers[PSStandardMembers] as PSMemberSet; PSMemberSet settings = typeMembers[PSStandardMembers] as PSMemberSet;
if (settings == null) PSPropertySet typeProperties = settings?.Members[PropertySerializationSet] as PSPropertySet;
continue;
PSPropertySet typeProperties = settings.Members[PropertySerializationSet] as PSPropertySet;
if (typeProperties == null) if (typeProperties == null)
continue; continue;
foreach (string reference in typeProperties.ReferencedPropertyNames) foreach (string reference in typeProperties.ReferencedPropertyNames)
@ -3551,64 +3549,70 @@ namespace System.Management.Automation.Runspaces
return PSObject.TransformMemberInfoCollection<PSMemberInfo, T>(GetMembers(types)); return PSObject.TransformMemberInfoCollection<PSMemberInfo, T>(GetMembers(types));
} }
private PSMemberInfoInternalCollection<PSMemberInfo> GetMembers(ConsolidatedString types) internal PSMemberInfoInternalCollection<PSMemberInfo> GetMembers(ConsolidatedString types)
{ {
if ((types == null) || string.IsNullOrEmpty(types.Key)) if ((types == null) || string.IsNullOrEmpty(types.Key))
{ {
return new PSMemberInfoInternalCollection<PSMemberInfo>(); return new PSMemberInfoInternalCollection<PSMemberInfo>();
} }
PSMemberInfoInternalCollection<PSMemberInfo> result = _consolidatedMembers.GetOrAdd(types.Key, k =>
PSMemberInfoInternalCollection<PSMemberInfo> result = _consolidatedMembers.GetOrAdd(types.Key, _memberFactoryFunc, types);
return result;
}
private PSMemberInfoInternalCollection<PSMemberInfo> MemberFactory(string k, ConsolidatedString types)
{
var retValue = new PSMemberInfoInternalCollection<PSMemberInfo>();
for (int i = types.Count - 1; i >= 0; i--)
{ {
var retValue = new PSMemberInfoInternalCollection<PSMemberInfo>(); if (!_extendedMembers.TryGetValue(types[i], out var typeMembers))
for (int i = types.Count - 1; i >= 0; i--)
{ {
PSMemberInfoInternalCollection<PSMemberInfo> typeMembers; continue;
if (!_extendedMembers.TryGetValue(types[i], out typeMembers))
{
continue;
}
foreach (PSMemberInfo typeMember in typeMembers)
{
PSMemberInfo currentMember = retValue[typeMember.Name];
// If the member was not present, we add it
if (currentMember == null)
{
retValue.Add(typeMember.Copy());
continue;
}
// There was a currentMember with the same name as typeMember
PSMemberSet currentMemberAsMemberSet = currentMember as PSMemberSet;
PSMemberSet typeMemberAsMemberSet = typeMember as PSMemberSet;
// if we are not in a memberset inherit members situation we just replace
// the current member with the new more specific member
if (currentMemberAsMemberSet == null || typeMemberAsMemberSet == null ||
!typeMemberAsMemberSet.InheritMembers)
{
retValue.Remove(typeMember.Name);
retValue.Add(typeMember.Copy());
continue;
}
// We are in a MemberSet InheritMembers situation, so we add the members in
// typeMembers to the existing memberset.
foreach (PSMemberInfo typeMemberAsMemberSetMember in typeMemberAsMemberSet.Members)
{
if (currentMemberAsMemberSet.Members[typeMemberAsMemberSetMember.Name] == null)
{
((PSMemberInfoIntegratingCollection<PSMemberInfo>)currentMemberAsMemberSet.Members)
.AddToTypesXmlCache(typeMemberAsMemberSetMember, false);
continue;
}
// there is a name conflict, the new member wins.
Diagnostics.Assert(!typeMemberAsMemberSetMember.IsHidden,
"new member in types.xml cannot be hidden");
currentMemberAsMemberSet.InternalMembers.Replace(typeMemberAsMemberSetMember);
}
}
} }
return retValue; foreach (PSMemberInfo typeMember in typeMembers)
}); {
return result; PSMemberInfo currentMember = retValue[typeMember.Name];
// If the member was not present, we add it
if (currentMember == null)
{
retValue.Add(typeMember.Copy());
continue;
}
// There was a currentMember with the same name as typeMember
PSMemberSet currentMemberAsMemberSet = currentMember as PSMemberSet;
PSMemberSet typeMemberAsMemberSet = typeMember as PSMemberSet;
// if we are not in a memberset inherit members situation we just replace
// the current member with the new more specific member
if (currentMemberAsMemberSet == null || typeMemberAsMemberSet == null ||
!typeMemberAsMemberSet.InheritMembers)
{
retValue.Remove(typeMember.Name);
retValue.Add(typeMember.Copy());
continue;
}
// We are in a MemberSet InheritMembers situation, so we add the members in
// typeMembers to the existing memberset.
foreach (PSMemberInfo typeMemberAsMemberSetMember in typeMemberAsMemberSet.Members)
{
if (currentMemberAsMemberSet.Members[typeMemberAsMemberSetMember.Name] == null)
{
((PSMemberInfoIntegratingCollection<PSMemberInfo>)currentMemberAsMemberSet.Members)
.AddToTypesXmlCache(typeMemberAsMemberSetMember, false);
continue;
}
// there is a name conflict, the new member wins.
Diagnostics.Assert(!typeMemberAsMemberSetMember.IsHidden,
"new member in types.xml cannot be hidden");
currentMemberAsMemberSet.InternalMembers.Replace(typeMemberAsMemberSetMember);
}
}
}
return retValue;
} }
/// <summary> /// <summary>

View file

@ -4848,7 +4848,7 @@ namespace System.Management.Automation.Language
lock (binderList) lock (binderList)
{ {
if (!binderList.Any()) if (binderList.Count == 0)
{ {
// Force one binder to be created if one hasn't been created already. // Force one binder to be created if one hasn't been created already.
PSGetMemberBinder.Get(memberName, (Type)null, @static: false); PSGetMemberBinder.Get(memberName, (Type)null, @static: false);