nixpkgs/pkgs/top-level/release-attrpaths-superset.nix
Adam Joseph eda44b7415 pkgs/top-level/release-attrpaths-superset.nix: init
This file walks the entire nixpkgs tree and emits a superset of all
release attrnames in only 44 seconds on a 3ghz CPU, using 5 gbytes
of memory.  By comparison, on the same CPU the `nix-env` hack used
by ofborg on every PR submission requires 41 *minutes* and peaks at
60 gbytes, even with checkMeta turned off.  Full details below.

This is:

- 46x faster (or 2.1% of the elapsed time)
- 12.5x less memory (or 8.0% of the peak memory usage)

In order to replace the ofborg check, this list of attrnames must
then be post-filtered for platform-relevance.  However, crucially,
the post-filtering can be done *in parallel* on multiple cores by
splitting the attrname list in to chunks.  Generating the list of
attrnames cannot be parallelized because it is a single-threaded
cppnix task.

This PR also adds `recurseForDerivations` where necessary within
nixpkgs in order to make this possible -- it screens out various
non-tryEval-catchable failures and infinite recursions.  Before
undraftifying, I will add an invocation of this command to the CI
tests, to ensure that the work performed here is not immediately
undone.  My next PR will then add an additional CI check confirming
that the emitted attrpaths are in fact a superset of the release
attrpaths calculated by the slow-memory-hog ofborg method.

I have manually confirmed that this is the case at the tip commit of
this PR, but we need CI to make sure this remains true until ofborg
switches to this more-efficient method of calculation; at that point
the superset-check can be dropped.

According to GNU Time,

        Command being timed: "nix-instantiate --eval --strict --json pkgs/top-level/release-attrpaths-superset.nix -A names"
        User time (seconds): 44.88
        System time (seconds): 8.09
        Percent of CPU this job got: 99%
        Elapsed (wall clock) time (h:mm:ss or m:ss): 0:53.20
        Average shared text size (kbytes): 0
        Average unshared data size (kbytes): 0
        Average stack size (kbytes): 0
        Average total size (kbytes): 0
        Maximum resident set size (kbytes): 4823028
        Average resident set size (kbytes): 0
        Major (requiring I/O) page faults: 0
        Minor (reclaiming a frame) page faults: 3611240
        Voluntary context switches: 113
        Involuntary context switches: 949
        Swaps: 0
        File system inputs: 1480
        File system outputs: 5944
        Socket messages sent: 0
        Socket messages received: 0
        Signals delivered: 0
        Page size (bytes): 4096
        Exit status: 0

Compared to release-outpaths.nix:

        Command being timed: "nix-env -qaP --no-name --out-path --arg checkMeta false --argstr path /git/work/pr/release-outpaths -f pkgs/top-level/release-outpaths.nix"
        User time (seconds): 2120.67
        System time (seconds): 337.80
        Percent of CPU this job got: 98%
        Elapsed (wall clock) time (h:mm:ss or m:ss): 41:37.91
        Average shared text size (kbytes): 0
        Average unshared data size (kbytes): 0
        Average stack size (kbytes): 0
        Average total size (kbytes): 0
        Maximum resident set size (kbytes): 60171768
        Average resident set size (kbytes): 0
        Major (requiring I/O) page faults: 2
        Minor (reclaiming a frame) page faults: 230608113
        Voluntary context switches: 8876
        Involuntary context switches: 22275
        Swaps: 0
        File system inputs: 62624
        File system outputs: 72
        Socket messages sent: 0
        Socket messages received: 0
        Signals delivered: 0
        Page size (bytes): 4096
        Exit status: 0
2023-12-15 05:13:50 -08:00

192 lines
6.2 KiB
Nix

# This expression will, as efficiently as possible, dump a
# *superset* of all attrpaths of derivations which might be
# part of a release on *any* platform.
#
# Both this expression and what ofborg uses (release-outpaths.nix)
# are essentially single-threaded (under the current cppnix
# implementation).
#
# This expression runs much, much, much faster and uses much, much
# less memory than the ofborg script by skipping the
# platform-relevance checks. The ofborg outpaths.nix script takes
# half an hour on a 3ghz core and peaks at 60gbytes of memory; this
# expression runs on the same machine in 44 seconds with peak memory
# usage of 5gbytes.
#
# Once you have the list of attrnames you can split it up into
# $NUM_CORES batches and run the platform checks separately for each
# batch, in parallel.
#
# To dump the attrnames:
#
# nix-instantiate --eval --strict --json pkgs/top-level/release-attrpaths-superset.nix -A names
#
{ lib ? import (path + "/lib")
, trace ? false
, enableWarnings ? true
, checkMeta ? true
, path ? ./../..
}:
let
# No release package attrpath may have any of these attrnames as
# its initial component.
#
# If you can find a way to remove any of these entries without
# causing CI to fail, please do so.
#
excluded-toplevel-attrs = {
# spliced packagesets
__splicedPackages = true;
pkgsBuildBuild = true;
pkgsBuildHost = true;
pkgsBuildTarget = true;
pkgsHostHost = true;
pkgsHostTarget = true;
pkgsTargetTarget = true;
buildPackages = true;
targetPackages = true;
# cross packagesets
pkgsLLVM = true;
pkgsMusl = true;
pkgsStatic = true;
pkgsCross = true;
pkgsi686Linux = true;
};
# No release package attrname may have any of these at a component
# anywhere in its attrpath. These are the names of gigantic
# top-level attrsets that have leaked into so many sub-packagesets
# that it's easier to simply exclude them entirely.
#
# If you can find a way to remove any of these entries without
# causing CI to fail, please do so.
#
excluded-attrnames-at-any-depth = {
lib = true;
override = true;
__functor = true;
__functionArgs = true;
newScope = true;
scope = true;
pkgs = true;
buildHaskellPackages = true;
buildPackages = true;
generateOptparseApplicativeCompletions = true;
callPackage = true;
mkDerivation = true;
overrideDerivation = true;
overrideScope = true;
overrideScope' = true;
# Special case: lib/types.nix leaks into a lot of nixos-related
# derivations, and does not eval deeply.
type = true;
};
# __attrsFailEvaluation is a temporary workaround to get top-level
# eval to succeed (under builtins.tryEval) for the entire
# packageset, without deep invasve changes into individual
# packages.
#
# Now that CI has been added, ensuring that top-level eval will
# not be broken by any new commits, you should not add any new
# occurrences of __attrsFailEvaluation, and should remove them
# wherever you are able to (doing so will likely require deep
# adjustments within packages). Once all of the uses of
# __attrsFailEvaluation are removed, it will be deleted from the
# routine below. In the meantime,
#
# The intended semantics are that an attrpath rooted at pkgs is
# part of the (unfiltered) release jobset iff all of the following
# are true:
#
# 1. The first component of the attrpath is not in
# `excluded-toplevel-attrs`
#
# 2. No attrname in the attrpath belongs to the list of forbidden
# attrnames `excluded-attrnames-at-any-depth`
#
# 3. The attrpath leads to a value for which lib.isDerivation is true
#
# 4. No proper prefix of the attrpath has __attrsFailEvaluation=true
#
# 5. Any proper prefix of the attrpath at which lib.isDerivation
# is true also has __recurseIntoDerivationForReleaseJobs=true.
#
# The last condition is unfortunately necessary because there are
# Hydra release jobnames which have proper prefixes which are
# attrnames of derivations (!). We should probably restructure
# the job tree so that this is not the case.
#
justAttrNames = path: value:
let
attempt =
if lib.isDerivation value &&
# in some places we have *derivations* with jobsets as subattributes, ugh
!(value.__recurseIntoDerivationForReleaseJobs or false) then
[ path ]
# Even wackier case: we have meta.broken==true jobs with
# !meta.broken jobs as subattributes with license=unfree, and
# check-meta.nix won't throw an "unfree" failure because the
# enclosing derivation is marked broken. Yeah. Bonkers.
# We should just forbid jobsets enclosed by derivations.
else if lib.isDerivation value &&
!value.meta.available then []
else if !(lib.isAttrs value) then []
else if (value.__attrsFailEvaluation or false) then []
else lib.pipe value [
(builtins.mapAttrs
(name: value:
if excluded-attrnames-at-any-depth.${name} or false then [] else
(justAttrNames (path ++ [name]) value)))
builtins.attrValues
builtins.concatLists
];
seq = builtins.deepSeq attempt attempt;
tried = builtins.tryEval seq;
result =
if tried.success
then tried.value
else if enableWarnings && path != [ "AAAAAASomeThingsFailToEvaluate" ]
then lib.warn "tryEval failed at: ${lib.concatStringsSep "." path}" []
else [];
in
if !trace
then result
else lib.trace "** ${lib.concatStringsSep "." path}" result;
unfiltered = import ./release-outpaths.nix {
inherit checkMeta;
attrNamesOnly = true;
inherit path;
};
filtered = lib.pipe unfiltered [
(pkgs: builtins.removeAttrs pkgs (builtins.attrNames excluded-toplevel-attrs))
];
paths =
[
# I am not entirely sure why these three packages end up in
# the Hydra jobset. But they do, and they don't meet the
# criteria above, so at the moment they are special-cased.
[ "pkgsLLVM" "stdenv" ]
[ "pkgsStatic" "stdenv" ]
[ "pkgsMusl" "stdenv" ]
] ++ justAttrNames [] filtered;
names =
map (path: (lib.concatStringsSep "." path)) paths;
in
{
inherit paths names;
}