libreoffice: generate-libreoffice-srcs.{sh->py}

This commit is contained in:
Chris Martin 2016-09-18 23:01:16 -04:00
parent 3afe2061a2
commit 754836ab4c
6 changed files with 278 additions and 80 deletions

View file

@ -19,9 +19,11 @@ stdenv.mkDerivation {
builder = ./download-list-builder.sh;
};
buildInputs = [ python3 ];
shellHook = ''
function generate {
./generate-libreoffice-srcs.sh | tee libreoffice-srcs.nix
python3 generate-libreoffice-srcs.py > libreoffice-srcs.nix
}
'';
}

View file

@ -0,0 +1,269 @@
#!/usr/bin/env python3
"""
Converts the LibreOffice `download.lst` file into a Nix expression.
Requires an environment variable named `downloadList` identifying the path
of the input file, and writes the result to stdout.
todo - Ideally we would move as much as possible into derivation dependencies.
"""
import collections, itertools, json, re, sys, os
def main():
print('[')
for x in get_packages():
print('{')
print(' name = "{}";'.format(x['tarball']))
print(' md5 = "{}";'.format(x['md5']))
print(' brief = {};'.format('true' if x['brief'] else 'false'))
if 'subdir' in x:
print(' subDir = "{}";'.format(x['subdir']))
print('}')
print(']')
def get_packages():
"""
All of the package data: What's parsed from download.lst,
plus our additions.
"""
return apply_additions(get_packages_from_download_list(),
get_additions())
def get_additions():
"""
A mapping from package name (the all-caps identifiers used in
`download.lst`) to a dict of additional attributes to set on the package.
"""
with open('./libreoffice-srcs-additions.json') as f:
return json.load(f)
def apply_additions(xs, additions):
for x in xs:
yield dict_merge([x,
additions.get(x['name'], {})])
def get_packages_from_download_list():
"""
The result of parsing `download.lst`: A list of dicts containing keys
'name', 'tarball', 'md5', 'brief'.
"""
def lines():
for x in sub_symbols(parse_lines(get_lines())):
interpretation = interpret(x)
if interpretation == 'unrecognized':
print_skipped_line(x)
else:
yield dict_merge([x,
interpretation])
def cluster(xs):
"""
Groups lines according to their order within the file, to support
packages that are listed in `download.lst` more than once.
"""
keys = ['tarball', 'md5', 'brief']
a = {k: [x for x in xs if k in x['attrs']] for k in keys}
return zip(*[a[k] for k in keys])
def packages():
for (name, group) in groupby(lines(), lambda x: x['name']):
for xs in cluster(group):
yield {'name': name,
'attrs': dict_merge(x['attrs'] for x in xs),
'index': min(x['index'] for x in xs)}
for x in sorted(packages(), key=lambda x: x['index']):
yield dict_merge([{'name': x['name']},
x['attrs']])
def dict_merge(xs):
"""
>>> dict_merge([{1: 2}, {3: 4}, {3: 5}])
{1: 2, 3: 4}
"""
return dict(collections.ChainMap(*xs))
def groupby(xs, f):
"""
>>> groupby([1, 2, 3, 4], lambda x: x % 2)
[(0, [2, 4]), (1, [1, 3])]
"""
for (k, iter) in itertools.groupby(sorted(xs, key=f), f):
group = list(iter)
yield (f(group[0]), group)
def get_lines():
download_list = os.getenv('downloadList')
with open(download_list) as f:
return f.read().splitlines()
def print_skipped_line(x):
print('Skipped line {}: {}'.format(x['index'],
x['original']),
file=sys.stderr)
def parse_lines(lines):
"""
Input: List of strings (the lines from `download.lst`
Output: Iterator of dicts with keys 'key', 'value', and 'index'
"""
for (index, line) in enumerate(lines):
x = { 'index': index, 'original': line }
result = parse_line(line)
if result == 'nothing':
pass
elif result == 'unrecognized':
print_skipped_line(x)
else:
yield dict_merge([x,
result])
def parse_line(line):
"""
Input: A string
Output: One of 1. A dict with keys 'key', 'value'
2. 'nothing' (if the line contains no information)
2. 'unrecognized' (if parsing failed)
"""
if re.match('\s*(#.*)?$', line):
return 'nothing'
match = re.match('\s*export\s+([^:\s]+)\s*:=\s*(.*)$', line)
if match:
return {
'key': match.group(1),
'value': match.group(2).strip()
}
else:
return 'unrecognized'
def sub_symbols(xs):
"""
Do substitution of variables across all lines.
>>> sub_symbols([{'key': 'a', 'value': 'x'},
... {'key': 'c': 'value': '$(a)yz'}])
[{'key': 'a', 'value': 'x'}, {'key': 'c': 'value': 'xyz'}]
"""
xs = list(xs)
symbols = {x['key']: x for x in xs}
def get_value(k):
x = symbols.get(k)
return x['value'] if x is not None else None
for x in xs:
yield dict_merge([{'value': sub_str(x['value'], get_value)},
x])
def sub_str(string, func):
"""
Do substitution of variables in a single line.
>>> sub_str("x = $(x)", lambda k: {'x': 'a'}[k])
"x = a"
"""
def func2(m):
x = m.group(1)
result = func(x)
return result if result is not None else x
return re.sub(r'\$\(([^\$\(\)]+)\)', func2, string)
def interpret(x):
"""
Input: Dict with keys 'key' and 'value'
Output: One of 1. Dict with keys 'name' and 'attrs'
2. 'unrecognized' (if interpretation failed)
"""
for f in [interpret_md5, interpret_tarball_with_md5, interpret_tarball]:
result = f(x)
if result is not None:
return result
return 'unrecognized'
def interpret_md5(x):
"""
>>> interpret_md5("ODFGEN_MD5SUM", "32572ea48d9021bbd6fa317ddb697abc")
{'name': 'ODFGEN', 'attrs': {'md5': '32572ea48d9021bbd6fa317ddb697abc'}}
"""
match = re.match('^(.*)_MD5SUM$', x['key'])
if match:
return {'name': match.group(1),
'attrs': {'md5': x['value']}}
def interpret_tarball(x):
"""
>>> interpret_tarball("FREEHAND_TARBALL", "libfreehand-0.1.1.tar.bz2")
{'name': 'FREEHAND',
'attrs': {'tarball': 'libfreehand-0.1.1.tar.bz2', 'brief': True}}
"""
match = re.match('^(.*)_TARBALL$', x['key'])
if match:
return {'name': match.group(1),
'attrs': {'tarball': x['value'], 'brief': True}}
def interpret_tarball_with_md5(x):
"""
>>> interpret_tarball_with_md5("CLUCENE_TARBALL",\
"48d647fbd8ef8889e5a7f422c1bfda94-clucene-core-2.3.3.4.tar.gz")
{'name': 'CLUCENE',
'attrs': {'tarball': 'clucene-core-2.3.3.4.tar.gz',
'md5': '48d647fbd8ef8889e5a7f422c1bfda94', 'brief': False}}
"""
match = {'key': re.match('^(.*)_TARBALL$', x['key']),
'value': re.match('(?P<md5>[0-9a-fA-F]{32})-(?P<tarball>.+)$',
x['value'])}
if match['key'] and match['value']:
return {'name': match['key'].group(1),
'attrs': {'tarball': match['value'].group('tarball'),
'md5': match['value'].group('md5'),
'brief': False}}
main()

View file

@ -1,77 +0,0 @@
#!/run/current-system/sw/bin/bash
# Ideally we would move as much as possible into derivation dependencies
cat <<EOF
[
EOF
write_entry(){
echo '{'
echo " name = \"${name}\";"
echo " md5 = \"${md5}\";"
echo " brief = ${brief};"
eval "echo -n \"\$additions_${name%%[-_.]*}\""
eval "test -n \"\$additions_${name%%[-_.]*}\" && echo"
echo '}'
saved_line=
}
saved_line=
cat "$(dirname "$0")/libreoffice-srcs-additions.sh" "$downloadList" |
while read line; do
case "$line" in
EVAL\ *)
echo "${line#* }" >&2;
eval "${line#* }";
saved_line=
;;
\#*)
echo Skipping comment: "$line" >&2;
;;
*_MD5SUM\ :=*)
if test -n "$saved_line"; then
tbline="$saved_line"
else
read tbline;
fi;
line=${line##* };
line=${line##*:=};
if [ "${tbline#*VERSION_MICRO}" != "$tbline" ]; then
verline=${tbline##* };
read tbline;
tbline=${tbline##* };
tbline=${tbline##*:=};
md5=$line
name=$tbline;
name="${name/\$([A-Z]*_VERSION_MICRO)/$verline}"
else
tbline=${tbline##* };
tbline=${tbline##*:=};
md5=$line
name=$tbline;
fi
brief=true;
write_entry;
;;
*_TARBALL\ :=*)
line=${line##* };
line=${line##*:=};
line="${line#,}"
md5=${line:0:32};
name=${line:33};
name="${name%)}"
brief=false;
if test -n "$name"; then
write_entry;
else
saved_line="$line";
fi
;;
*)
echo Skipping: "$line" >&2;
;;
esac
done
echo ']'

View file

@ -0,0 +1,3 @@
{
"LIBGLTF": {"subdir": "libgltf/"}
}

View file

@ -1 +0,0 @@
EVAL additions_libgltf=' subDir = "libgltf/";'

View file

@ -19,9 +19,11 @@ stdenv.mkDerivation {
builder = ./download-list-builder.sh;
};
buildInputs = [ python3 ];
shellHook = ''
function generate {
./generate-libreoffice-srcs.sh | tee libreoffice-srcs-still.nix
python3 generate-libreoffice-srcs.py > libreoffice-srcs-still.nix
}
'';
}