Improve performance of Import-CSV up to 10 times (#7413)

Speed up the creation of PSObjects in CSV cmdlets by

- Use the overload Add(PSMember, bool prevalidated) for all objects except the first when adding the `NoteProperty` members.
- Add a new constructor to PSObject that preallocates the `_instanceMembers` collection with an initial capacity.
- Improve performance of AddToTypesXmlCache by avoiding an expensive copying of members just to check for the existence of one of them, perf is significantly increased, and allocations are reduced.
- Reduce allocations and GC pressure by preallocating and reusing `StringBuilders` and `List<string>` for line parsing in CSV cmdlets.
- Use `List<string>` instead of `Collection<string>` to get fewer virtual calls and better inlining.
- Reduce allocations by using a preallocated value factory in `TypeTable.GetMembers(ConsolidatedString types)`.
- Replace a `Linq.Any()` with a `List.Count > 0` in binder code.

The main gain is from taking advantage of the fact that all objects created by `Import-Csv` have the same shape (the same properties).
This commit is contained in:
Staffan Gustafsson 2018-08-06 19:58:55 +02:00 committed by Dongbo Wang
parent f0ea0d380f
commit d620c4fd1e
5 changed files with 109 additions and 76 deletions

View file

@ -1149,6 +1149,12 @@ namespace Microsoft.PowerShell.Commands
/// </summary>
private readonly StreamReader _sr;
// Initial sizes of the value list and the line stringbuilder.
// Set to reasonable initial sizes. They may grow beyond these,
// but this will prevent a few reallocations.
private const int ValueCountGuestimate = 16;
private const int LineLengthGuestimate = 256;
internal ImportCsvHelper(PSCmdlet cmdlet, char delimiter, IList<string> header, string typeName, StreamReader streamReader)
{
if (cmdlet == null)
@ -1235,9 +1241,11 @@ namespace Microsoft.PowerShell.Commands
TypeName = ReadTypeInformation();
}
var values = new List<string>(ValueCountGuestimate);
var builder = new StringBuilder(LineLengthGuestimate);
while ((Header == null) && (!this.EOF))
{
Collection<string> values = ParseNextRecord();
ParseNextRecord(values, builder);
// Trim all trailing blankspaces and delimiters ( single/multiple ).
// If there is only one element in the row and if its a blankspace we dont trim it.
@ -1278,9 +1286,12 @@ namespace Microsoft.PowerShell.Commands
{
_alreadyWarnedUnspecifiedName = alreadyWriteOutWarning;
ReadHeader();
var prevalidated = false;
var values = new List<string>(ValueCountGuestimate);
var builder = new StringBuilder(LineLengthGuestimate);
while (true)
{
Collection<string> values = ParseNextRecord();
ParseNextRecord(values, builder);
if (values.Count == 0)
break;
@ -1290,7 +1301,8 @@ namespace Microsoft.PowerShell.Commands
continue;
}
PSObject result = BuildMshobject(TypeName, Header, values, _delimiter);
PSObject result = BuildMshobject(TypeName, Header, values, _delimiter, prevalidated);
prevalidated = true;
_cmdlet.WriteObject(result);
}
alreadyWriteOutWarning = _alreadyWarnedUnspecifiedName;
@ -1365,13 +1377,12 @@ namespace Microsoft.PowerShell.Commands
/// <returns>
/// Parsed collection of strings.
/// </returns>
private Collection<string>
ParseNextRecord()
private void
ParseNextRecord(List<string> result, StringBuilder current)
{
// Collection of strings to return
Collection<string> result = new Collection<string>();
result.Clear();
// current string
StringBuilder current = new StringBuilder();
current.Clear();
bool seenBeginQuote = false;
// int i = 0;
@ -1519,8 +1530,6 @@ namespace Microsoft.PowerShell.Commands
{
result.Add(current.ToString());
}
return result;
}
// If we detect a newline we return it as a string "\r", "\n" or "\r\n"
@ -1611,10 +1620,10 @@ namespace Microsoft.PowerShell.Commands
private
PSObject
BuildMshobject(string type, IList<string> names, Collection<string> values, char delimiter)
BuildMshobject(string type, IList<string> names, List<string> values, char delimiter, bool preValidated = false)
{
//string[] namesarray = null;
PSObject result = new PSObject();
PSObject result = new PSObject(names.Count);
char delimiterlocal = delimiter;
int unspecifiedNameIndex = 1;
for (int i = 0; i <= names.Count - 1; i++)
@ -1635,7 +1644,8 @@ namespace Microsoft.PowerShell.Commands
{
value = values[i];
}
result.Properties.Add(new PSNoteProperty(name, value));
result.Properties.Add(new PSNoteProperty(name, value), preValidated);
}
if (!_alreadyWarnedUnspecifiedName && unspecifiedNameIndex != 1)
@ -1644,7 +1654,7 @@ namespace Microsoft.PowerShell.Commands
_alreadyWarnedUnspecifiedName = true;
}
if (type != null && type.Length > 0)
if (!string.IsNullOrEmpty(type))
{
result.TypeNames.Clear();
result.TypeNames.Add(type);

View file

@ -4019,6 +4019,14 @@ namespace System.Management.Automation
_members = new OrderedDictionary(StringComparer.OrdinalIgnoreCase);
}
/// <summary>
/// Constructs this collection with an initial capacity
/// </summary>
internal PSMemberInfoInternalCollection(int capacity)
{
_members = new OrderedDictionary(capacity, StringComparer.OrdinalIgnoreCase);
}
private void Replace(T oldMember, T newMember)
{
_members[newMember.Name] = newMember;
@ -4567,8 +4575,9 @@ namespace System.Management.Automation
TypeTable typeTable = _mshOwner.GetTypeTable();
if (typeTable != null)
{
PSMemberInfoInternalCollection<T> typesXmlMembers = typeTable.GetMembers<T>(_mshOwner.InternalTypeNames);
if (typesXmlMembers[member.Name] != null)
var typesXmlMembers = typeTable.GetMembers(_mshOwner.InternalTypeNames);
var typesXmlMember = typesXmlMembers[member.Name];
if (typesXmlMember is T)
{
throw new ExtendedTypeSystemException(
"AlreadyPresentInTypesXml",

View file

@ -521,6 +521,16 @@ namespace System.Management.Automation
CommonInitialization(PSCustomObject.SelfInstance);
}
/// <summary>
/// Initializes a new instance of PSObject with an PSCustomObject BaseObject
/// with an initial capacity for members
/// </summary>
/// <param name="instanceMemberCapacity">The initial capacity for the instance member collection.</param>
public PSObject(int instanceMemberCapacity) : this()
{
_instanceMembers = new PSMemberInfoInternalCollection<PSMemberInfo>(instanceMemberCapacity);
}
/// <summary>
/// Initializes a new instance of PSObject wrapping obj (accessible through BaseObject).
/// </summary>

View file

@ -2592,7 +2592,10 @@ namespace System.Management.Automation.Runspaces
// this is used to throw errors when updating a shared TypeTable.
internal readonly bool isShared;
private List<string> _typeFileList;
private readonly List<string> _typeFileList;
// The member factory is cached to avoid allocating Func<> delegates on each call
private readonly Func<string, ConsolidatedString, PSMemberInfoInternalCollection<PSMemberInfo>> _memberFactoryFunc;
// This holds all the type information that is in the typetable
// Holds file name if types file was used to update the types
@ -3392,6 +3395,7 @@ namespace System.Management.Automation.Runspaces
{
this.isShared = isShared;
_typeFileList = new List<string>();
_memberFactoryFunc = MemberFactory;
}
/// <summary>
@ -3461,16 +3465,13 @@ namespace System.Management.Automation.Runspaces
/// 1. There were errors loading TypeTable. Look in the Errors property to get
/// detailed error messages.
/// </exception>
internal TypeTable(IEnumerable<string> typeFiles, AuthorizationManager authorizationManager, PSHost host)
internal TypeTable(IEnumerable<string> typeFiles, AuthorizationManager authorizationManager, PSHost host) : this(isShared: true)
{
if (typeFiles == null)
{
throw PSTraceSource.NewArgumentNullException("typeFiles");
}
isShared = true;
_typeFileList = new List<string>();
ConcurrentBag<string> errors = new ConcurrentBag<string>();
foreach (string typefile in typeFiles)
{
@ -3510,13 +3511,10 @@ namespace System.Management.Automation.Runspaces
var retValueTable = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
foreach (string type in types)
{
PSMemberInfoInternalCollection<PSMemberInfo> typeMembers;
if (!_extendedMembers.TryGetValue(type, out typeMembers))
if (!_extendedMembers.TryGetValue(type, out var typeMembers))
continue;
PSMemberSet settings = typeMembers[PSStandardMembers] as PSMemberSet;
if (settings == null)
continue;
PSPropertySet typeProperties = settings.Members[PropertySerializationSet] as PSPropertySet;
PSPropertySet typeProperties = settings?.Members[PropertySerializationSet] as PSPropertySet;
if (typeProperties == null)
continue;
foreach (string reference in typeProperties.ReferencedPropertyNames)
@ -3551,64 +3549,70 @@ namespace System.Management.Automation.Runspaces
return PSObject.TransformMemberInfoCollection<PSMemberInfo, T>(GetMembers(types));
}
private PSMemberInfoInternalCollection<PSMemberInfo> GetMembers(ConsolidatedString types)
internal PSMemberInfoInternalCollection<PSMemberInfo> GetMembers(ConsolidatedString types)
{
if ((types == null) || string.IsNullOrEmpty(types.Key))
{
return new PSMemberInfoInternalCollection<PSMemberInfo>();
}
PSMemberInfoInternalCollection<PSMemberInfo> result = _consolidatedMembers.GetOrAdd(types.Key, k =>
PSMemberInfoInternalCollection<PSMemberInfo> result = _consolidatedMembers.GetOrAdd(types.Key, _memberFactoryFunc, types);
return result;
}
private PSMemberInfoInternalCollection<PSMemberInfo> MemberFactory(string k, ConsolidatedString types)
{
var retValue = new PSMemberInfoInternalCollection<PSMemberInfo>();
for (int i = types.Count - 1; i >= 0; i--)
{
var retValue = new PSMemberInfoInternalCollection<PSMemberInfo>();
for (int i = types.Count - 1; i >= 0; i--)
if (!_extendedMembers.TryGetValue(types[i], out var typeMembers))
{
PSMemberInfoInternalCollection<PSMemberInfo> typeMembers;
if (!_extendedMembers.TryGetValue(types[i], out typeMembers))
{
continue;
}
foreach (PSMemberInfo typeMember in typeMembers)
{
PSMemberInfo currentMember = retValue[typeMember.Name];
// If the member was not present, we add it
if (currentMember == null)
{
retValue.Add(typeMember.Copy());
continue;
}
// There was a currentMember with the same name as typeMember
PSMemberSet currentMemberAsMemberSet = currentMember as PSMemberSet;
PSMemberSet typeMemberAsMemberSet = typeMember as PSMemberSet;
// if we are not in a memberset inherit members situation we just replace
// the current member with the new more specific member
if (currentMemberAsMemberSet == null || typeMemberAsMemberSet == null ||
!typeMemberAsMemberSet.InheritMembers)
{
retValue.Remove(typeMember.Name);
retValue.Add(typeMember.Copy());
continue;
}
// We are in a MemberSet InheritMembers situation, so we add the members in
// typeMembers to the existing memberset.
foreach (PSMemberInfo typeMemberAsMemberSetMember in typeMemberAsMemberSet.Members)
{
if (currentMemberAsMemberSet.Members[typeMemberAsMemberSetMember.Name] == null)
{
((PSMemberInfoIntegratingCollection<PSMemberInfo>)currentMemberAsMemberSet.Members)
.AddToTypesXmlCache(typeMemberAsMemberSetMember, false);
continue;
}
// there is a name conflict, the new member wins.
Diagnostics.Assert(!typeMemberAsMemberSetMember.IsHidden,
"new member in types.xml cannot be hidden");
currentMemberAsMemberSet.InternalMembers.Replace(typeMemberAsMemberSetMember);
}
}
continue;
}
return retValue;
});
return result;
foreach (PSMemberInfo typeMember in typeMembers)
{
PSMemberInfo currentMember = retValue[typeMember.Name];
// If the member was not present, we add it
if (currentMember == null)
{
retValue.Add(typeMember.Copy());
continue;
}
// There was a currentMember with the same name as typeMember
PSMemberSet currentMemberAsMemberSet = currentMember as PSMemberSet;
PSMemberSet typeMemberAsMemberSet = typeMember as PSMemberSet;
// if we are not in a memberset inherit members situation we just replace
// the current member with the new more specific member
if (currentMemberAsMemberSet == null || typeMemberAsMemberSet == null ||
!typeMemberAsMemberSet.InheritMembers)
{
retValue.Remove(typeMember.Name);
retValue.Add(typeMember.Copy());
continue;
}
// We are in a MemberSet InheritMembers situation, so we add the members in
// typeMembers to the existing memberset.
foreach (PSMemberInfo typeMemberAsMemberSetMember in typeMemberAsMemberSet.Members)
{
if (currentMemberAsMemberSet.Members[typeMemberAsMemberSetMember.Name] == null)
{
((PSMemberInfoIntegratingCollection<PSMemberInfo>)currentMemberAsMemberSet.Members)
.AddToTypesXmlCache(typeMemberAsMemberSetMember, false);
continue;
}
// there is a name conflict, the new member wins.
Diagnostics.Assert(!typeMemberAsMemberSetMember.IsHidden,
"new member in types.xml cannot be hidden");
currentMemberAsMemberSet.InternalMembers.Replace(typeMemberAsMemberSetMember);
}
}
}
return retValue;
}
/// <summary>

View file

@ -4848,7 +4848,7 @@ namespace System.Management.Automation.Language
lock (binderList)
{
if (!binderList.Any())
if (binderList.Count == 0)
{
// Force one binder to be created if one hasn't been created already.
PSGetMemberBinder.Get(memberName, (Type)null, @static: false);