nixpkgs/maintainers/scripts/fetch-kde-qt.sh

196 lines
5.9 KiB
Bash
Executable file

#! /usr/bin/env nix-shell
#! nix-shell -i bash -p coreutils findutils gnused nix wget
set -efuo pipefail
export LC_COLLATE=C # fix sort order
# parse files and folders from https://download.kde.org/ and https://download.qt.io/
# you can override this function in fetch.sh
function PARSE_INDEX() {
cat "$1" | grep -o -E -e '\s+href="[^"]+\.tar\.xz"' -e '\s+href="[-_a-zA-Z0-9]+/"' | cut -d'"' -f2 | sort | uniq
}
if [ $# != 1 ]; then
echo "example use:" >&2
echo "cd nixpkgs/" >&2
echo "./maintainers/scripts/fetch-kde-qt.sh pkgs/development/libraries/qt-5/5.12" >&2
exit 1
fi
if ! echo "$1" | grep -q '^pkgs/'; then
echo "error: path argument must start with pkgs/" >&2
exit 1
fi
# need absolute path for the pushd-popd block
if [ -f "$1" ]; then
echo "ok: using fetchfile $1"
fetchfilerel="$1"
fetchfile="$(readlink -f "$fetchfilerel")" # resolve absolute path
basedir="$(dirname "$fetchfile")"
basedirrel="$(dirname "$fetchfilerel")"
elif [ -d "$1" ]; then
echo "ok: using basedir $1"
basedirrel="$1"
basedir="$(readlink -f "$basedirrel")" # resolve absolute path
if ! [ -d "$basedir" ]; then
basedir="$(dirname "$basedir")"
fi
fetchfile="$basedir/fetch.sh"
else
echo 'error: $1 must be file or dir' >&2
exit 1
fi
pkgname=$(basename "$basedir")
SRCS="$basedir/srcs.nix"
srcsrel="$basedirrel/srcs.nix"
source "$fetchfile"
if [ -n "$WGET_ARGS" ]; then # old format
BASE_URL="${WGET_ARGS[0]}" # convert to new format
# validate
if ! echo "$BASE_URL" | grep -q -E '^(http|https|ftp)://'; then
printf 'error: from WGET_ARGS, converted invalid BASE_URL: %q\n' "$BASE_URL" >&2
exit 1
fi
printf 'ok: from WGET_ARGS, converted BASE_URL: %q\n' "$BASE_URL"
elif [ -n "$BASE_URL" ]; then # new format
:
else
echo "error: fetch.sh must set either WGET_ARGS or BASE_URL" >&2
exit 1
fi
tmptpl=tmp.fetch-kde-qt.$pkgname.XXXXXXXXXX
tmp=$(mktemp -d $tmptpl)
pushd $tmp >/dev/null
echo "tempdir is $tmp"
wgetargs='--quiet --show-progress'
#wgetargs='' # debug
dirlist="$BASE_URL"
filelist=""
base_url_len=${#BASE_URL}
clean_urls() {
# // -> /
sed -E 's,//+,/,g' | sed -E 's,^(http|https|ftp):/,&/,'
}
while [ -n "$dirlist" ]
do
for dirurl in $dirlist
do
echo "fetching index.html from $dirurl"
relpath=$(echo "./${dirurl:$base_url_len}" | clean_urls)
mkdir -p "$relpath"
indexfile=$(echo "$relpath/index.html" | clean_urls)
wget $wgetargs -O "$indexfile" "$dirurl"
echo "parsing $indexfile"
filedirlist="$(PARSE_INDEX "$indexfile")"
filelist_next="$(echo "$filedirlist" | grep '\.tar\.xz$' | while read file; do echo "$dirurl/$file"; done)"
filelist_next="$(echo "$filelist_next" | clean_urls)"
[ -n "$filelist" ] && filelist+=$'\n'
filelist+="$filelist_next"
dirlist="$(echo "$filedirlist" | grep -v '\.tar\.xz$' | while read dir; do echo "$dirurl/$dir"; done || true)"
dirlist="$(echo "$dirlist" | clean_urls)"
done
done
filecount=$(echo "$filelist" | wc -l)
if [ -z "$filelist" ]
then
echo "error: no files parsed from $tmp/index.html"
exit 1
fi
echo "parsed $filecount tar.xz files:"; echo "$filelist"
# most time is spent here
echo "fetching $filecount sha256 files ..."
urllist="$(echo "$filelist" | while read file; do echo "$file.sha256"; done)"
# wget -r: keep directory structure
echo "$urllist" | xargs wget $wgetargs -nH -r -c --no-parent && {
actual=$(find . -type f -name '*.sha256' | wc -l)
echo "fetching $filecount sha256 files done: got $actual files"
} || {
# workaround: in rare cases, the server does not provide the sha256 files
# for example when the release is just a few hours old
# and the servers are not yet fully synced
actual=$(find . -type f -name '*.sha256' | wc -l)
echo "fetching $filecount sha256 files failed: got only $actual files"
# TODO fetch only missing tar.xz files
echo "fetching $filecount tar.xz files ..."
echo "$filelist" | xargs wget $wgetargs -nH -r -c --no-parent
echo "generating sha256 files ..."
find . -type f -name '*.tar.xz' | while read src; do
name=$(basename "$src")
sha256=$(sha256sum "$src" | cut -d' ' -f1)
echo "$sha256 $name" >"$src.sha256"
done
}
csv=$(mktemp $tmptpl.csv)
echo "writing temporary file $csv ..."
find . -type f -name '*.sha256' | while read sha256file; do
src="${sha256file%.*}" # remove extension
sha256=$(cat $sha256file | cut -d' ' -f1) # base16
sha256=$(nix-hash --type sha256 --to-base32 $sha256)
# Sanitize file name
filename=$(basename "$src" | tr '@' '_')
nameVersion="${filename%.tar.*}"
name=$(echo "$nameVersion" | sed -e 's,-[[:digit:]].*,,' | sed -e 's,-opensource-src$,,' | sed -e 's,-everywhere-src$,,')
version=$(echo "$nameVersion" | sed -e 's,^\([[:alpha:]][[:alnum:]]*-\)\+,,')
echo "$name,$version,$src,$filename,$sha256" >>$csv
done
files_before=$(grep -c 'src = ' "$SRCS")
echo "writing output file $SRCS ..."
cat >"$SRCS" <<EOF
# DO NOT EDIT! This file is generated automatically.
# Command: $0 $@
{ fetchurl, mirror }:
{
EOF
gawk -F , "{ print \$1 }" $csv | sort | uniq | while read name; do
versions=$(gawk -F , "/^$name,/ { print \$2 }" $csv)
latestVersion=$(echo "$versions" | sort -rV | head -n 1)
src=$(gawk -F , "/^$name,$latestVersion,/ { print \$3 }" $csv)
filename=$(gawk -F , "/^$name,$latestVersion,/ { print \$4 }" $csv)
sha256=$(gawk -F , "/^$name,$latestVersion,/ { print \$5 }" $csv)
url="${src:2}"
cat >>"$SRCS" <<EOF
$name = {
version = "$latestVersion";
src = fetchurl {
url = "\${mirror}/$url";
sha256 = "$sha256";
name = "$filename";
};
};
EOF
done
echo "}" >>"$SRCS"
files_after=$(grep -c 'src = ' "$SRCS")
echo "files before: $files_before"
echo "files after: $files_after"
echo "compare:"
echo "git diff $srcsrel"
popd >/dev/null
rm -fr $tmp >/dev/null
rm -f $csv >/dev/null