nixpkgs/maintainers/scripts/fetch-kde-qt.sh

#! /usr/bin/env nix-shell
#! nix-shell -i bash -p coreutils findutils gnused nix wget

set -efuo pipefail
export LC_COLLATE=C # fix sort order

# parse files and folders from https://download.kde.org/ and https://download.qt.io/
# you can override this function in fetch.sh
function PARSE_INDEX() {
    cat "$1" | grep -o -E -e '\s+href="[^"]+\.tar\.xz"' -e '\s+href="[-_a-zA-Z0-9]+/"' | cut -d'"' -f2 | sort | uniq
}

if [ $# != 1 ]; then
    echo "example use:" >&2
    echo "cd nixpkgs/" >&2
    echo "./maintainers/scripts/fetch-kde-qt.sh pkgs/development/libraries/qt-5/5.12" >&2
    exit 1
fi

if ! echo "$1" | grep -q '^pkgs/'; then
    echo "error: path argument must start with pkgs/" >&2
    exit 1
fi

# need absolute path for the pushd-popd block
if [ -f "$1" ]; then
    echo "ok: using fetchfile $1"
    fetchfilerel="$1"
    fetchfile="$(readlink -f "$fetchfilerel")" # resolve absolute path
    basedir="$(dirname "$fetchfile")"
    basedirrel="$(dirname "$fetchfilerel")"
elif [ -d "$1" ]; then
    echo "ok: using basedir $1"
    basedirrel="$1"
    basedir="$(readlink -f "$basedirrel")" # resolve absolute path
    if ! [ -d "$basedir" ]; then
        basedir="$(dirname "$basedir")"
    fi
    fetchfile="$basedir/fetch.sh"
else
    echo 'error: $1 must be file or dir' >&2
    exit 1
fi

pkgname=$(basename "$basedir")
SRCS="$basedir/srcs.nix"
srcsrel="$basedirrel/srcs.nix"

source "$fetchfile"

if [ -n "$WGET_ARGS" ]; then # old format
    BASE_URL="${WGET_ARGS[0]}" # convert to new format
    # validate
    if ! echo "$BASE_URL" | grep -q -E '^(http|https|ftp)://'; then
        printf 'error: from WGET_ARGS, converted invalid BASE_URL: %q\n' "$BASE_URL" >&2
        exit 1
    fi
    printf 'ok: from WGET_ARGS, converted BASE_URL: %q\n' "$BASE_URL"
elif [ -n "$BASE_URL" ]; then # new format
    :
else
    echo "error: fetch.sh must set either WGET_ARGS or BASE_URL" >&2
    exit 1
fi

tmptpl=tmp.fetch-kde-qt.$pkgname.XXXXXXXXXX

tmp=$(mktemp -d $tmptpl)
pushd $tmp >/dev/null
echo "tempdir is $tmp"

wgetargs='--quiet --show-progress'
#wgetargs='' # debug

dirlist="$BASE_URL"
filelist=""
base_url_len=${#BASE_URL}

clean_urls() {
    # // -> /
    sed -E 's,//+,/,g' | sed -E 's,^(http|https|ftp):/,&/,'
}

while [ -n "$dirlist" ]
do
    for dirurl in $dirlist
    do
        echo "fetching index.html from $dirurl"
        relpath=$(echo "./${dirurl:$base_url_len}" | clean_urls)
        mkdir -p "$relpath"
        indexfile=$(echo "$relpath/index.html" | clean_urls)
        wget $wgetargs -O "$indexfile" "$dirurl"
        echo "parsing $indexfile"
        filedirlist="$(PARSE_INDEX "$indexfile")"
        filelist_next="$(echo "$filedirlist" | grep '\.tar\.xz$' | while read file; do echo "$dirurl/$file"; done)"
        filelist_next="$(echo "$filelist_next" | clean_urls)"
        [ -n "$filelist" ] && filelist+=$'\n'
        filelist+="$filelist_next"
        dirlist="$(echo "$filedirlist" | grep -v '\.tar\.xz$' | while read dir; do echo "$dirurl/$dir"; done || true)"
        dirlist="$(echo "$dirlist" | clean_urls)"
    done
done

filecount=$(echo "$filelist" | wc -l)

if [ -z "$filelist" ]
then
    echo "error: no files parsed from $tmp/index.html"
    exit 1
fi

echo "parsed $filecount tar.xz files:"; echo "$filelist"

# most time is spent here
echo "fetching $filecount sha256 files ..."
urllist="$(echo "$filelist" | while read file; do echo "$file.sha256"; done)"
# wget -r: keep directory structure
echo "$urllist" | xargs wget $wgetargs -nH -r -c --no-parent && {
    actual=$(find . -type f -name '*.sha256' | wc -l)
    echo "fetching $filecount sha256 files done: got $actual files"
} || {
    # workaround: in rare cases, the server does not provide the sha256 files
    # for example when the release is just a few hours old
    # and the servers are not yet fully synced
    actual=$(find . -type f -name '*.sha256' | wc -l)
    echo "fetching $filecount sha256 files failed: got only $actual files"

    # TODO fetch only missing tar.xz files
    echo "fetching $filecount tar.xz files ..."
    urllist="$(echo "$filelist" | while read file; do echo "$BASE_URL/$file"; done)"
    echo "$urllist" | xargs wget $wgetargs -nH -r -c --no-parent

    echo "generating sha256 files ..."
    find . -type f -name '*.tar.xz' | while read src; do
        name=$(basename "$src")
        sha256=$(sha256sum "$src" | cut -d' ' -f1)
        echo "$sha256  $name" >"$src.sha256"
    done
}

csv=$(mktemp $tmptpl.csv)
echo "writing temporary file $csv ..."
find . -type f -name '*.sha256' | while read sha256file; do
    src="${sha256file%.*}" # remove extension
    sha256=$(cat $sha256file | cut -d' ' -f1) # base16
    sha256=$(nix-hash --type sha256 --to-base32 $sha256)
    # Sanitize file name
    filename=$(basename "$src" | tr '@' '_')
    nameVersion="${filename%.tar.*}"
    name=$(echo "$nameVersion" | sed -e 's,-[[:digit:]].*,,' | sed -e 's,-opensource-src$,,' | sed -e 's,-everywhere-src$,,')
    version=$(echo "$nameVersion" | sed -e 's,^\([[:alpha:]][[:alnum:]]*-\)\+,,')
    echo "$name,$version,$src,$filename,$sha256" >>$csv
done

files_before=$(grep -c 'src = ' "$SRCS")

echo "writing output file $SRCS ..."
cat >"$SRCS" <<EOF
# DO NOT EDIT! This file is generated automatically.
# Command: $0 $@
{ fetchurl, mirror }:

{
EOF

gawk -F , "{ print \$1 }" $csv | sort | uniq | while read name; do
    versions=$(gawk -F , "/^$name,/ { print \$2 }" $csv)
    latestVersion=$(echo "$versions" | sort -rV | head -n 1)
    src=$(gawk -F , "/^$name,$latestVersion,/ { print \$3 }" $csv)
    filename=$(gawk -F , "/^$name,$latestVersion,/ { print \$4 }" $csv)
    sha256=$(gawk -F , "/^$name,$latestVersion,/ { print \$5 }" $csv)
    url="${src:2}"
    cat >>"$SRCS" <<EOF
  $name = {
    version = "$latestVersion";
    src = fetchurl {
      url = "\${mirror}/$url";
      sha256 = "$sha256";
      name = "$filename";
    };
  };
EOF
done

echo "}" >>"$SRCS"

files_after=$(grep -c 'src = ' "$SRCS")
echo "files before: $files_before"
echo "files after:  $files_after"

echo "compare:"
echo "git diff $srcsrel"

popd >/dev/null
rm -fr $tmp >/dev/null

rm -f $csv >/dev/null
Fix evaluation error in Qt/KDE packages For now, the source hashes will continue to be stored in the Nixpkgs tree. The package update instructions are updated. 2016-06-17 16:47:16 +02:00			`#! /usr/bin/env nix-shell`
			`#! nix-shell -i bash -p coreutils findutils gnused nix wget`

fetch-kde-qt.sh: add bash safety flags 2016-09-18 15:16:07 +02:00			`set -efuo pipefail`
fetch-kde-qt.sh: get sha256 from server 2022-07-07 18:46:37 +02:00			`export LC_COLLATE=C # fix sort order`
fetch-kde-qt.sh: add bash safety flags 2016-09-18 15:16:07 +02:00
fetch-kde-qt.sh: get sha256 from server 2022-07-07 18:46:37 +02:00			`# parse files and folders from https://download.kde.org/ and https://download.qt.io/`
			`# you can override this function in fetch.sh`
			`function PARSE_INDEX() {`
			`cat "$1" \| grep -o -E -e '\s+href="[^"]+\.tar\.xz"' -e '\s+href="[-_a-zA-Z0-9]+/"' \| cut -d'"' -f2 \| sort \| uniq`
			`}`

			`if [ $# != 1 ]; then`
			`echo "example use:" >&2`
			`echo "cd nixpkgs/" >&2`
			`echo "./maintainers/scripts/fetch-kde-qt.sh pkgs/development/libraries/qt-5/5.12" >&2`
			`exit 1`
			`fi`

			`if ! echo "$1" \| grep -q '^pkgs/'; then`
			`echo "error: path argument must start with pkgs/" >&2`
			`exit 1`
			`fi`

			`# need absolute path for the pushd-popd block`
			`if [ -f "$1" ]; then`
			`echo "ok: using fetchfile $1"`
			`fetchfilerel="$1"`
			`fetchfile="$(readlink -f "$fetchfilerel")" # resolve absolute path`
			`basedir="$(dirname "$fetchfile")"`
			`basedirrel="$(dirname "$fetchfilerel")"`
			`elif [ -d "$1" ]; then`
			`echo "ok: using basedir $1"`
			`basedirrel="$1"`
			`basedir="$(readlink -f "$basedirrel")" # resolve absolute path`
			`if ! [ -d "$basedir" ]; then`
			`basedir="$(dirname "$basedir")"`
			`fi`
			`fetchfile="$basedir/fetch.sh"`
fetch-kde-qt.sh: modularize script allowing multiple versions 2016-08-25 17:06:23 +02:00			`else`
fetch-kde-qt.sh: get sha256 from server 2022-07-07 18:46:37 +02:00			`echo 'error: $1 must be file or dir' >&2`
			`exit 1`
fetch-kde-qt.sh: modularize script allowing multiple versions 2016-08-25 17:06:23 +02:00			`fi`

fetch-kde-qt.sh: get sha256 from server 2022-07-07 18:46:37 +02:00			`pkgname=$(basename "$basedir")`
			`SRCS="$basedir/srcs.nix"`
			`srcsrel="$basedirrel/srcs.nix"`

			`source "$fetchfile"`

			`if [ -n "$WGET_ARGS" ]; then # old format`
			`BASE_URL="${WGET_ARGS[0]}" # convert to new format`
			`# validate`
			`if ! echo "$BASE_URL" \| grep -q -E '^(http\|https\|ftp)://'; then`
			`printf 'error: from WGET_ARGS, converted invalid BASE_URL: %q\n' "$BASE_URL" >&2`
			`exit 1`
			`fi`
			`printf 'ok: from WGET_ARGS, converted BASE_URL: %q\n' "$BASE_URL"`
			`elif [ -n "$BASE_URL" ]; then # new format`
			`:`
			`else`
			`echo "error: fetch.sh must set either WGET_ARGS or BASE_URL" >&2`
			`exit 1`
			`fi`

			`tmptpl=tmp.fetch-kde-qt.$pkgname.XXXXXXXXXX`

			`tmp=$(mktemp -d $tmptpl)`
Fix evaluation error in Qt/KDE packages For now, the source hashes will continue to be stored in the Nixpkgs tree. The package update instructions are updated. 2016-06-17 16:47:16 +02:00			`pushd $tmp >/dev/null`
fetch-kde-qt.sh: get sha256 from server 2022-07-07 18:46:37 +02:00			`echo "tempdir is $tmp"`

			`wgetargs='--quiet --show-progress'`
			`#wgetargs='' # debug`

			`dirlist="$BASE_URL"`
			`filelist=""`
			`base_url_len=${#BASE_URL}`
Fix evaluation error in Qt/KDE packages For now, the source hashes will continue to be stored in the Nixpkgs tree. The package update instructions are updated. 2016-06-17 16:47:16 +02:00
fetch-kde-qt.sh: get sha256 from server 2022-07-07 18:46:37 +02:00			`clean_urls() {`
			`# // -> /`
			`sed -E 's,//+,/,g' \| sed -E 's,^(http\|https\|ftp):/,&/,'`
			`}`

			`while [ -n "$dirlist" ]`
			`do`
			`for dirurl in $dirlist`
			`do`
			`echo "fetching index.html from $dirurl"`
			`relpath=$(echo "./${dirurl:$base_url_len}" \| clean_urls)`
			`mkdir -p "$relpath"`
			`indexfile=$(echo "$relpath/index.html" \| clean_urls)`
			`wget $wgetargs -O "$indexfile" "$dirurl"`
			`echo "parsing $indexfile"`
			`filedirlist="$(PARSE_INDEX "$indexfile")"`
			`filelist_next="$(echo "$filedirlist" \| grep '\.tar\.xz$' \| while read file; do echo "$dirurl/$file"; done)"`
			`filelist_next="$(echo "$filelist_next" \| clean_urls)"`
			`[ -n "$filelist" ] && filelist+=$'\n'`
			`filelist+="$filelist_next"`
			`dirlist="$(echo "$filedirlist" \| grep -v '\.tar\.xz$' \| while read dir; do echo "$dirurl/$dir"; done \|\| true)"`
			`dirlist="$(echo "$dirlist" \| clean_urls)"`
			`done`
			`done`

			`filecount=$(echo "$filelist" \| wc -l)`

			`if [ -z "$filelist" ]`
			`then`
			`echo "error: no files parsed from $tmp/index.html"`
			`exit 1`
			`fi`

			`echo "parsed $filecount tar.xz files:"; echo "$filelist"`

			`# most time is spent here`
			`echo "fetching $filecount sha256 files ..."`
			`urllist="$(echo "$filelist" \| while read file; do echo "$file.sha256"; done)"`
			`# wget -r: keep directory structure`
			`echo "$urllist" \| xargs wget $wgetargs -nH -r -c --no-parent && {`
			`actual=$(find . -type f -name '*.sha256' \| wc -l)`
			`echo "fetching $filecount sha256 files done: got $actual files"`
			`} \|\| {`
			`# workaround: in rare cases, the server does not provide the sha256 files`
			`# for example when the release is just a few hours old`
			`# and the servers are not yet fully synced`
			`actual=$(find . -type f -name '*.sha256' \| wc -l)`
			`echo "fetching $filecount sha256 files failed: got only $actual files"`

			`# TODO fetch only missing tar.xz files`
			`echo "fetching $filecount tar.xz files ..."`
			`urllist="$(echo "$filelist" \| while read file; do echo "$BASE_URL/$file"; done)"`
			`echo "$urllist" \| xargs wget $wgetargs -nH -r -c --no-parent`

			`echo "generating sha256 files ..."`
			`find . -type f -name '*.tar.xz' \| while read src; do`
			`name=$(basename "$src")`
			`sha256=$(sha256sum "$src" \| cut -d' ' -f1)`
			`echo "$sha256 $name" >"$src.sha256"`
			`done`
			`}`

			`csv=$(mktemp $tmptpl.csv)`
			`echo "writing temporary file $csv ..."`
			`find . -type f -name '*.sha256' \| while read sha256file; do`
			`src="${sha256file%.*}" # remove extension`
			`sha256=$(cat $sha256file \| cut -d' ' -f1) # base16`
			`sha256=$(nix-hash --type sha256 --to-base32 $sha256)`
Fix evaluation error in Qt/KDE packages For now, the source hashes will continue to be stored in the Nixpkgs tree. The package update instructions are updated. 2016-06-17 16:47:16 +02:00			`# Sanitize file name`
Revert "fetch-kde-qt.sh: get hashes from metadata" This reverts commit 4b7d9dc86800fa7b861f47ae0e6e3a8f097f9ce7. The KDE project has changed their source index pages so that the links to package metadata files are generated by JavaScript after the page loads. As a result, wget is no longer able to recursively fetch the package metadata automatically. 2020-12-24 14:58:53 +01:00			`filename=$(basename "$src" \| tr '@' '_')`
Fix evaluation error in Qt/KDE packages For now, the source hashes will continue to be stored in the Nixpkgs tree. The package update instructions are updated. 2016-06-17 16:47:16 +02:00			`nameVersion="${filename%.tar.*}"`
Qt5: 5.9.3 -> 5.10.0 2018-01-01 21:50:26 +01:00			`name=$(echo "$nameVersion" \| sed -e 's,-[[:digit:]].*,,' \| sed -e 's,-opensource-src$,,' \| sed -e 's,-everywhere-src$,,')`
Fix evaluation error in Qt/KDE packages For now, the source hashes will continue to be stored in the Nixpkgs tree. The package update instructions are updated. 2016-06-17 16:47:16 +02:00			`version=$(echo "$nameVersion" \| sed -e 's,^\([[:alpha:]][[:alnum:]]*-\)\+,,')`
fetch-kde-qt.sh: get sha256 from server 2022-07-07 18:46:37 +02:00			`echo "$name,$version,$src,$filename,$sha256" >>$csv`
Fix evaluation error in Qt/KDE packages For now, the source hashes will continue to be stored in the Nixpkgs tree. The package update instructions are updated. 2016-06-17 16:47:16 +02:00			`done`

fetch-kde-qt.sh: get sha256 from server 2022-07-07 18:46:37 +02:00			`files_before=$(grep -c 'src = ' "$SRCS")`

			`echo "writing output file $SRCS ..."`
fetch-kde-qt.sh: modularize script allowing multiple versions 2016-08-25 17:06:23 +02:00			`cat >"$SRCS" <<EOF`
plasma5: 5.16.5 -> 5.17.5 2020-01-27 15:29:28 +01:00			`# DO NOT EDIT! This file is generated automatically.`
			`# Command: $0 $@`
Fix evaluation error in Qt/KDE packages For now, the source hashes will continue to be stored in the Nixpkgs tree. The package update instructions are updated. 2016-06-17 16:47:16 +02:00			`{ fetchurl, mirror }:`

			`{`
			`EOF`

			`gawk -F , "{ print \$1 }" $csv \| sort \| uniq \| while read name; do`
			`versions=$(gawk -F , "/^$name,/ { print \$2 }" $csv)`
			`latestVersion=$(echo "$versions" \| sort -rV \| head -n 1)`
			`src=$(gawk -F , "/^$name,$latestVersion,/ { print \$3 }" $csv)`
			`filename=$(gawk -F , "/^$name,$latestVersion,/ { print \$4 }" $csv)`
fetch-kde-qt.sh: get sha256 from server 2022-07-07 18:46:37 +02:00			`sha256=$(gawk -F , "/^$name,$latestVersion,/ { print \$5 }" $csv)`
Revert "fetch-kde-qt.sh: get hashes from metadata" This reverts commit 4b7d9dc86800fa7b861f47ae0e6e3a8f097f9ce7. The KDE project has changed their source index pages so that the links to package metadata files are generated by JavaScript after the page loads. As a result, wget is no longer able to recursively fetch the package metadata automatically. 2020-12-24 14:58:53 +01:00			`url="${src:2}"`
fetch-kde-qt.sh: modularize script allowing multiple versions 2016-08-25 17:06:23 +02:00			`cat >>"$SRCS" <<EOF`
Fix evaluation error in Qt/KDE packages For now, the source hashes will continue to be stored in the Nixpkgs tree. The package update instructions are updated. 2016-06-17 16:47:16 +02:00			`$name = {`
			`version = "$latestVersion";`
			`src = fetchurl {`
			`url = "\${mirror}/$url";`
			`sha256 = "$sha256";`
			`name = "$filename";`
			`};`
			`};`
			`EOF`
			`done`

fetch-kde-qt.sh: modularize script allowing multiple versions 2016-08-25 17:06:23 +02:00			`echo "}" >>"$SRCS"`
Fix evaluation error in Qt/KDE packages For now, the source hashes will continue to be stored in the Nixpkgs tree. The package update instructions are updated. 2016-06-17 16:47:16 +02:00
fetch-kde-qt.sh: get sha256 from server 2022-07-07 18:46:37 +02:00			`files_after=$(grep -c 'src = ' "$SRCS")`
			`echo "files before: $files_before"`
			`echo "files after: $files_after"`

			`echo "compare:"`
			`echo "git diff $srcsrel"`

Fix evaluation error in Qt/KDE packages For now, the source hashes will continue to be stored in the Nixpkgs tree. The package update instructions are updated. 2016-06-17 16:47:16 +02:00			`popd >/dev/null`
			`rm -fr $tmp >/dev/null`

			`rm -f $csv >/dev/null`