#!/bin/bash

# Wrapper script which tries to sensibly unpack the archive file(s) it's passed.
# 
# The following archive formats are supported
#     .7z .a .ace .arj .cab .cpio .deb .lha .pmd .rar .rpm .tar{,.gz,.Z,.bz2,.xz}
#     .tnef .zip .zoo  and MIME/uuencoded mail attachments.
# (provided the necessary utilities are available).
#
# file(1) is used to diagnose the archive type so no file extension need be
# present. Also archives that are actually in one of the above formats but with
# a different extension, such as
#     .taz .tgz .jar .xpi
# will work.
# 
# Features:
#     - Should unpack into a single subdirectory or file within the
#       current directory.
#     - If the archive would naturally dump into a single subdirectory it isn't
#       nested into a new one. (Although if you say no to clobbering the output
#       might be nested).
#     - World-writable bits are stripped off.
#     - When the archive would naturally unpack to an existing location you are
#       asked if you want to clobber it.
#       
#       SAY "NO" WHEN ASKED TO CLOBBER UNLESS YOU KNOW *EXACTLY* WHAT WILL HAPPEN
#
#       When you say no the script will tell you the name of a randomly-named
#       directory in which you can find your archive contents.
#
#       Clobbering will trash files and possibly whole directories if the
#       archive contains files with the same name. Commonly named directories
#       should be merged. BUG: restrictive permissions on existing files and
#       directories can break this script.
#
# In Debian (or Ubuntu) packages exist for all of the utilities here. This is a
# complete list, although you probably don't need or want to install them all:
#    arj binutils bzip2 cabextract cpio dpkg gzip lha macutils mpack p7zip-full
#    ppmd procmail rpm sharutils tar tnef unace unrar unshield unzip xz-utils zoo
# 
# BUGS/Limitations:
#     - Robustness: your platform might not match my assumptions. It works for
#       me on Ubuntu GNU/Linux with the unarchiving utilities I have installed.
#     - You cannot unpack standard input, just files
#     - It's up to you to check the clobbering behaviour will be what you want.
#     - In an adversarial environment there could be some security issues.
#       In particular look around the mv and find operations for race conditions.
#       Basically I haven't invested the time necessary to make this safe
#       outside filesystems you control. This goes for many shell scripts.
#     - Permissions. As above the script may fail to clobber files depending on
#       permissions. Also the different archive utilities called by this script
#       do different things with permissions by default. I have not attempted to
#       make them consistent.
#
# I hope you can find a use for this script. But on the off-chance you had
# delusions to the contrary: there is ABSOLUTELY NO WARRANTY; USE AT YOUR OWN RISK.
# 
# Iain Murray 2005, 2007, 2010

# After using my original script for a couple of years I finally fixed some
# issues in 2007:
#   - It's simpler to alter. Archive contents are no longer inspected before
#     unpacking; I just unpack somewhere and see what comes out. This makes
#     adding new file formats considerably easier and dealing with the utilities
#     more robust.
#   - The clobbering behaviour is now much better. Hopefully it should now be
#     consistent and most corner cases covered. 
#   - World-writable permissions are not allowed. I discovered a large number of
#     world-writable files in my filespace due to unpacking from archives.
#     Mainly .zips I think as by default tar uses the user's umask rather than
#     the archive's bits. I could of course enforce a umask, but don't feel the
#     need at the moment.
#
# 2010, added tar.xz support

set -e

ME=`basename $0`
TAR=tar

# This function takes a single file, works out what it is and unpacks it into
# the current directory
function unpack {
    INFO=$(file "$1" | sed 's/[^:]*: //')
    if echo $INFO | grep 'gzip compressed' > /dev/null ; then
        "$TAR" zxf "$1"
    elif echo $INFO | grep "compress'd" > /dev/null ; then
        uncompress -c "$1" | "$TAR" x
    elif echo $INFO | grep "bzip2 compressed" > /dev/null ; then
        bzcat "$1" | "$TAR" x
    elif echo $INFO | grep "xz compressed" > /dev/null ; then
        xzcat "$1" | "$TAR" x
    elif echo $INFO | grep -i "tar archive" > /dev/null ; then
        "$TAR" xf "$1"
    elif echo $INFO | grep "Zip archive" > /dev/null ; then
        unzip "$1"
    elif echo $INFO | grep "RAR archive" > /dev/null ; then
        unrar x -ap. "$1"
    elif echo $INFO | grep "7-zip" > /dev/null ; then
        7z x "$1"
    elif echo $INFO | grep "RPM " > /dev/null ; then
        rpm2cpio "$1" | cpio -i --no-absolute-filenames -d --quiet
    elif echo $INFO | grep "cpio archive" > /dev/null ; then
        cpio -i --no-absolute-filenames -d --quiet < "$1"
    elif echo $INFO | grep "Debian .* package" > /dev/null ; then
        dpkg-deb -x "$1" .
    elif echo $INFO | grep -i " ar archive" > /dev/null ; then
        ar x "$1"
    elif echo $INFO | grep -i "ACE archive" > /dev/null ; then
        unace e "$1"
    elif echo $INFO | grep -i "ARJ archive" > /dev/null ; then
        arj e "$1"
    elif echo $INFO | grep -i "ZOO archive" > /dev/null ; then
        zoo x "$1"
    elif echo $INFO | grep -i "\(tnef\|Transport Neutral Encapsulation Format\)" > /dev/null ; then
        tnef "$1"
    elif echo $INFO | grep -i "InstallShield CAB" > /dev/null ; then
        unshield x "$1"
    elif echo $INFO | grep -i "LHa" > /dev/null ; then
        lha x "$1"
    elif echo $INFO | grep -i "\(mail\|news\)" > /dev/null ; then
        formail -s munpack < "$1"
    elif echo $INFO | grep -i "uuencode" > /dev/null ; then
        uudecode "$1"
    elif echo $INFO | grep -i "cab" > /dev/null ; then
        cabextract "$1"
    elif echo $INFO | grep -i "PPMD archive" > /dev/null ; then
        # Insists on file being in same directory.
        ln -s "$1" .
        ppmd d "$1"
        rm `basename "$1"`
    else
        echo $ME: Sorry I do not know about files of type \"$INFO\"
    fi
}

# Some systems come with a fullname command, but providing one here won't hurt:
function fullname {
    if [ "${1:0:1}" = '/' ] ; then    # ${::} is a bashism, don't use /bin/sh
        echo "$1"
    else
        echo "$PWD/$1"
    fi
}

function mergedirs {
    # Copy contents of directory $1 into directory $2
    # Delete anything that exists in $1 from $2
    # WARNING: this is tricky
    # 
    TARGET=`fullname "$2"`
    ORIG_PWD="$PWD"
    cd "$1"
    # Remove files and directories in the way of files
    find . \! -type d -exec rm -Rf "$TARGET"/'{}' \;
    # Remove files in the way of directories. Don't remove directories (no -R
    # option to rm) or contents not in the archive will be lost.
    find . -type d \! -name '.' -exec rm -f "$TARGET"/'{}' \; 2> /dev/null
    # Move directories (will only succeed when directories don't exist)
    # After this whole directory structure should exist.
    find . -type d \! -name '.' -exec mv -T '{}' "$TARGET"/'{}' \; 2> /dev/null
    # Move any remaining files (in top level and those within unmoved directories)
    find . \! -type d -exec mv '{}' "$TARGET"/'{}' \; 2> /dev/null
    # Delete anything that hasn't moved (hopefully just empty directories, but
    # I'm being a bit cavalier here) so that rmdir will work
    find . -maxdepth 1 \! -name '.' -exec rm -Rf '{}' \;
    cd "$ORIG_PWD"
    rmdir "$1"
    return 0
}

function agree_to_clobber {
    read -p "Clobber y/n [n]? " response
    echo $response | grep '^[Yy]' > /dev/null
    return
}

function mvqueryclobber {
    # Arguments: 1=source_archive_file 2=unarchived_output 3=target_location
    # Attempt to move $2 to $3, but ask before breaking anything and just
    # return 1 if doing the move seems dangerous or confusing.
    if [ -e "$3" ] ; then
        if [ -f "$2" -a -f "$3" ] ; then
            # Source and target are both ordinary files
            echo $ME: Would you like to clobber existing file
            echo     "$3"
            echo with the file just unpacked from `basename $1` ?
            if agree_to_clobber ; then
                mv -f "$2" "$3"
                return 
            fi
        fi
        if [ -d "$3" -a -d "$2" ] ; then
            # Source and target are both directories
            echo $ME: Would you like to clobber existing data in
            echo     "$3"
            echo with the data just unpacked from `basename $1` ?
            echo 'WARNING: this can be really bad idea (only say yes if very sure)'
            if agree_to_clobber ; then
                mergedirs "$2" "$3"
                return
            else
                return 1
            fi
        else
            # Source and target are mix of file-types, or are weird files. Don't
            # want risk the move as bad things might happen.
            return 1
        fi
    else
        # Target doesn't currently exist so try simple move
        if mv "$2" "$3" ; then
            return 0
        else
            return 1
        fi
    fi
}

for input_file in "$@" ; do
    if [ \! -e "$input_file" ] ; then
        echo "$ME: $input_file does not exist."
        exit 1
    fi
    input_file_fullname=`fullname "$input_file"`

    # Safely extract into randomly-named out_dir, not allowing any
    # world-writable bits that may be set by archive program
    out_dir=`echo "$input_file"_$RANDOM | sed -e 's/.*\///'`
    mkdir "$out_dir"
    user_perms=`stat --format="%a" "$out_dir"`
    chmod o-rwx "$out_dir"
    cd "$out_dir"
    unpack "$input_file_fullname"
    chmod -R o-w .
    chmod "$user_perms" . 

    # Once extracted need to move into nicely named place. But this risks
    # clobbering and depends on how many files came out of the archive. This
    # (hopefully) deals with all the corner cases:
    output_files=(*)
    num_files=${#output_files[*]}
    cd ..
    if [ "$output_files" = "*" ] ; then
        echo $ME: Extracted no files from $input_file
        rmdir "$out_dir"
    elif [ $num_files -eq 1 ] ; then
        if mvqueryclobber "$input_file" "$out_dir/$output_files" "$output_files" ; then
            echo $ME: Extracted $input_file to $output_files
            rmdir "$out_dir"
        else
            echo $ME: Extracted $input_file to $out_dir/$output_files
        fi
    else
        # Expanded into multiple files, need to make up a nice directory name to put them in
        # by knocking off something that looks like a file extension or adding
        # _contents to the archive name.
        nicer_out_dir=`echo "$input_file"_contents | sed -e 's/.*\///' \
            -e 's/\(.\)\(\.tar\|\)\.[a-z0-9]\{1,4\}_contents$/\1/i'`
        if mvqueryclobber "$input_file" "$out_dir" "$nicer_out_dir" ; then
            echo $ME: Extracted $input_file to $nicer_out_dir
        else
            echo $ME: Extracted $input_file to $out_dir
        fi
    fi
done

