#!/usr/bin/env bash

# Purpose: plain text tar format
# Limitations: - only suitable for text files, directories, and symlinks
#              - stores only filename, content, and mode
#              - not designed for untrusted input
#
# Note: must work with bash version 3.2 (macOS)

# Copyright 2017 Roger Luethi
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

set -o errexit -o nounset

# Sanitize environment (for instance, standard sorting of glob matches)
export LC_ALL=C

path=""
CMD=""
ARG_STRING="$*"

#------------------------------------------------------------------------------
# Not all sed implementations can work on null bytes. In order to make ttar
# work out of the box on macOS, use Python as a stream editor.

USE_PYTHON=0

PYTHON_CREATE_FILTER=$(cat << 'PCF'
#!/usr/bin/env python

import re
import sys

for line in sys.stdin:
    line = re.sub(r'EOF', r'\EOF', line)
    line = re.sub(r'NULLBYTE', r'\NULLBYTE', line)
    line = re.sub('\x00', r'NULLBYTE', line)
    sys.stdout.write(line)
PCF
)

PYTHON_EXTRACT_FILTER=$(cat << 'PEF'
#!/usr/bin/env python

import re
import sys

for line in sys.stdin:
    line = re.sub(r'(?<!\\)NULLBYTE', '\x00', line)
    line = re.sub(r'\\NULLBYTE', 'NULLBYTE', line)
    line = re.sub(r'([^\\])EOF', r'\1', line)
    line = re.sub(r'\\EOF', 'EOF', line)
    sys.stdout.write(line)
PEF
)

function test_environment {
    if [[ "$(echo "a" | sed 's/a/\x0/' | wc -c)" -ne 2 ]]; then
        echo "WARNING sed unable to handle null bytes, using Python (slow)."
        if ! which python >/dev/null; then
            echo "ERROR Python not found. Aborting."
            exit 2
        fi
        USE_PYTHON=1
    fi
}

#------------------------------------------------------------------------------

function usage {
    bname=$(basename "$0")
    cat << USAGE
Usage:   $bname [-C <DIR>] -c -f <ARCHIVE> <FILE...> (create archive)
         $bname            -t -f <ARCHIVE>           (list archive contents)
         $bname [-C <DIR>] -x -f <ARCHIVE>           (extract archive)

Options:
         -C <DIR>           (change directory)
         -v                 (verbose)
         --recursive-unlink (recursively delete existing directory if path
                             collides with file or directory to extract)

Example: Change to sysfs directory, create ttar file from fixtures directory
         $bname -C sysfs -c -f sysfs/fixtures.ttar fixtures/
USAGE
exit "$1"
}

function vecho {
    if [ "${VERBOSE:-}" == "yes" ]; then
        echo >&7 "$@"
    fi
}

function set_cmd {
    if [ -n "$CMD" ]; then
        echo "ERROR: more than one command given"
        echo
        usage 2
    fi
    CMD=$1
}

unset VERBOSE
unset RECURSIVE_UNLINK

while getopts :cf:-:htxvC: opt; do
    case $opt in
        c)
            set_cmd "create"
            ;;
        f)
            ARCHIVE=$OPTARG
            ;;
        h)
            usage 0
            ;;
        t)
            set_cmd "list"
            ;;
        x)
            set_cmd "extract"
            ;;
        v)
            VERBOSE=yes
            exec 7>&1
            ;;
        C)
            CDIR=$OPTARG
            ;;
        -)
            case $OPTARG in
                recursive-unlink)
                    RECURSIVE_UNLINK="yes"
                    ;;
                *)
                    echo -e "Error: invalid option -$OPTARG"
                    echo
                    usage 1
                    ;;
            esac
            ;;
        *)
            echo >&2 "ERROR: invalid option -$OPTARG"
            echo
            usage 1
            ;;
    esac
done

# Remove processed options from arguments
shift $(( OPTIND - 1 ));

if [ "${CMD:-}" == "" ]; then
    echo >&2 "ERROR: no command given"
    echo
    usage 1
elif [ "${ARCHIVE:-}" == "" ]; then
    echo >&2 "ERROR: no archive name given"
    echo
    usage 1
fi

function list {
    local path=""
    local size=0
    local line_no=0
    local ttar_file=$1
    if [ -n "${2:-}" ]; then
        echo >&2 "ERROR: too many arguments."
        echo
        usage 1
    fi
    if [ ! -e "$ttar_file" ]; then
        echo >&2 "ERROR: file not found ($ttar_file)"
        echo
        usage 1
    fi
    while read -r line; do
        line_no=$(( line_no + 1 ))
        if [ $size -gt 0 ]; then
            size=$(( size - 1 ))
            continue
        fi
        if [[ $line =~ ^Path:\ (.*)$ ]]; then
            path=${BASH_REMATCH[1]}
        elif [[ $line =~ ^Lines:\ (.*)$ ]]; then
            size=${BASH_REMATCH[1]}
            echo "$path"
        elif [[ $line =~ ^Directory:\ (.*)$ ]]; then
            path=${BASH_REMATCH[1]}
            echo "$path/"
        elif [[ $line =~ ^SymlinkTo:\ (.*)$ ]]; then
            echo  "$path -> ${BASH_REMATCH[1]}"
        fi
    done < "$ttar_file"
}

function extract {
    local path=""
    local size=0
    local line_no=0
    local ttar_file=$1
    if [ -n "${2:-}" ]; then
        echo >&2 "ERROR: too many arguments."
        echo
        usage 1
    fi
    if [ ! -e "$ttar_file" ]; then
        echo >&2 "ERROR: file not found ($ttar_file)"
        echo
        usage 1
    fi
    while IFS= read -r line; do
        line_no=$(( line_no + 1 ))
        local eof_without_newline
        if [ "$size" -gt 0 ]; then
            if [[ "$line" =~ [^\\]EOF ]]; then
                # An EOF not preceded by a backslash indicates that the line
                # does not end with a newline
                eof_without_newline=1
            else
                eof_without_newline=0
            fi
            # Replace NULLBYTE with null byte if at beginning of line
            # Replace NULLBYTE with null byte unless preceded by backslash
            # Remove one backslash in front of NULLBYTE (if any)
            # Remove EOF unless preceded by backslash
            # Remove one backslash in front of EOF
            if [ $USE_PYTHON -eq 1 ]; then
                echo -n "$line" | python -c "$PYTHON_EXTRACT_FILTER" >> "$path"
            else
                # The repeated pattern makes up for sed's lack of negative
                # lookbehind assertions (for consecutive null bytes).
                echo -n "$line" | \
                    sed -e 's/^NULLBYTE/\x0/g;
                            s/\([^\\]\)NULLBYTE/\1\x0/g;
                            s/\([^\\]\)NULLBYTE/\1\x0/g;
                            s/\\NULLBYTE/NULLBYTE/g;
                            s/\([^\\]\)EOF/\1/g;
                            s/\\EOF/EOF/g;
                    ' >> "$path"
            fi
            if [[ "$eof_without_newline" -eq 0 ]]; then
                echo >> "$path"
            fi
            size=$(( size - 1 ))
            continue
        fi
        if [[ $line =~ ^Path:\ (.*)$ ]]; then
            path=${BASH_REMATCH[1]}
            if [ -L "$path" ]; then
                rm "$path"
            elif [ -d "$path" ]; then
                if [ "${RECURSIVE_UNLINK:-}" == "yes" ]; then
                    rm -r "$path"
                else
                    # Safe because symlinks to directories are dealt with above
                    rmdir "$path"
                fi
            elif [ -e "$path" ]; then
                rm "$path"
            fi
        elif [[ $line =~ ^Lines:\ (.*)$ ]]; then
            size=${BASH_REMATCH[1]}
            # Create file even if it is zero-length.
            touch "$path"
            vecho "    $path"
        elif [[ $line =~ ^Mode:\ (.*)$ ]]; then
            mode=${BASH_REMATCH[1]}
            chmod "$mode" "$path"
            vecho "$mode"
        elif [[ $line =~ ^Directory:\ (.*)$ ]]; then
            path=${BASH_REMATCH[1]}
            mkdir -p "$path"
            vecho "    $path/"
        elif [[ $line =~ ^SymlinkTo:\ (.*)$ ]]; then
            ln -s "${BASH_REMATCH[1]}" "$path"
            vecho "    $path -> ${BASH_REMATCH[1]}"
        elif [[ $line =~ ^# ]]; then
            # Ignore comments between files
            continue
        else
            echo >&2 "ERROR: Unknown keyword on line $line_no: $line"
            exit 1
        fi
    done < "$ttar_file"
}

function div {
    echo "# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -" \
         "- - - - - -"
}

function get_mode {
    local mfile=$1
    if [ -z "${STAT_OPTION:-}" ]; then
        if stat -c '%a' "$mfile" >/dev/null 2>&1; then
            # GNU stat
            STAT_OPTION='-c'
            STAT_FORMAT='%a'
        else
            # BSD stat
            STAT_OPTION='-f'
            # Octal output, user/group/other (omit file type, sticky bit)
            STAT_FORMAT='%OLp'
        fi
    fi
    stat "${STAT_OPTION}" "${STAT_FORMAT}" "$mfile"
}

function _create {
    shopt -s nullglob
    local mode
    local eof_without_newline
    while (( "$#" )); do
        file=$1
        if [ -L "$file" ]; then
            echo "Path: $file"
            symlinkTo=$(readlink "$file")
            echo "SymlinkTo: $symlinkTo"
            vecho "    $file -> $symlinkTo"
            div
        elif [ -d "$file" ]; then
            # Strip trailing slash (if there is one)
            file=${file%/}
            echo "Directory: $file"
            mode=$(get_mode "$file")
            echo "Mode: $mode"
            vecho "$mode $file/"
            div
            # Find all files and dirs, including hidden/dot files
            for x in "$file/"{*,.[^.]*}; do
                _create "$x"
            done
        elif [ -f "$file" ]; then
            echo "Path: $file"
            lines=$(wc -l "$file"|awk '{print $1}')
            eof_without_newline=0
            if [[ "$(wc -c "$file"|awk '{print $1}')" -gt 0 ]] && \
                    [[ "$(tail -c 1 "$file" | wc -l)" -eq 0 ]]; then
                eof_without_newline=1
                lines=$((lines+1))
            fi
            echo "Lines: $lines"
            # Add backslash in front of EOF
            # Add backslash in front of NULLBYTE
            # Replace null byte with NULLBYTE
            if [ $USE_PYTHON -eq 1 ]; then
                < "$file" python -c "$PYTHON_CREATE_FILTER"
            else
                < "$file" \
                    sed 's/EOF/\\EOF/g;
                            s/NULLBYTE/\\NULLBYTE/g;
                            s/\x0/NULLBYTE/g;
                    '
            fi
            if [[ "$eof_without_newline" -eq 1 ]]; then
                # Finish line with EOF to indicate that the original line did
                # not end with a linefeed
                echo "EOF"
            fi
            mode=$(get_mode "$file")
            echo "Mode: $mode"
            vecho "$mode $file"
            div
        else
            echo >&2 "ERROR: file not found ($file in $(pwd))"
            exit 2
        fi
        shift
    done
}

function create {
    ttar_file=$1
    shift
    if [ -z "${1:-}" ]; then
        echo >&2 "ERROR: missing arguments."
        echo
        usage 1
    fi
    if [ -e "$ttar_file" ]; then
        rm "$ttar_file"
    fi
    exec > "$ttar_file"
    echo "# Archive created by ttar $ARG_STRING"
    _create "$@"
}

test_environment

if [ -n "${CDIR:-}" ]; then
    if [[ "$ARCHIVE" != /* ]]; then
        # Relative path: preserve the archive's location before changing
        # directory
        ARCHIVE="$(pwd)/$ARCHIVE"
    fi
    cd "$CDIR"
fi

"$CMD" "$ARCHIVE" "$@"