#!/usr/bin/env python3
# Python 2 and 3 compatible.

"""
Pretty print the size of a file at a URL.

It's pretty forgiving about URL formats and follows redirections.

Example usage:
% urlsize www.cs.toronto.edu/~murray/pub/07thesis/murray_thesis_2007.pdf
Reporting size of file at: http://homepages.inf.ed.ac.uk/imurray2/pub/07thesis/murray_thesis_2007.pdf
3,650,691 bytes (3.48 MiB)
"""

# Iain Murray, June 2011. Tweaks for python3 June 2012. For python >=3.7 March 2023.

from __future__ import division
import sys, locale, re, os.path

using_py2 = True
try:
    import urllib2
except:
    import urllib.request as urllib2
    using_py2 = False

def usage():
    print('') 
    print('    Usage: ' + sys.argv[0] + ' http://host/location\n')
    print('    pretty prints the size of the remote file.')
    print('') 

def get_url_num_bytes(url, verbose=True):
    """
    Get number of bytes of file at a URL. None if not reported.
    """
    # urllib2 could be a bit more intelligent in guessing what I mean:
    if not re.match('^[a-zA-Z]*:', url):
        if os.path.exists(url):
            url = 'file:' + url
        else:
            url = 'http://' + url
    try:
        class HeadRequest(urllib2.Request):
            def get_method(self):
                return "HEAD"
        response = urllib2.urlopen(HeadRequest(
                url, None, {'User-Agent': 'Mozilla/5.0'}))
        if using_py2:
            num_bytes = response.info().getheader('content-length')
        else:
            # For python 3
            num_bytes = response.info().get('content-length')
        if verbose:
            # urllib does redirections for me. Report if it's been clever:
            if response.geturl() != url:
                print('Reporting size of file at: ' + response.geturl())
    except:
        print('Failed to connect to url.')
        num_bytes = None
    if num_bytes is not None:
        num_bytes = int(num_bytes)
    return num_bytes

def pretty_print(num_bytes):
    """
    Output number of bytes according to locale and with IEC binary prefixes
    """
    if num_bytes is None:
        print('File size unavailable.')
        return
    KiB = 1024
    MiB = KiB * KiB
    GiB = KiB * MiB
    TiB = KiB * GiB
    PiB = KiB * TiB
    EiB = KiB * PiB
    ZiB = KiB * EiB
    YiB = KiB * ZiB
    locale.setlocale(locale.LC_ALL, '')
    output = locale.format_string("%d", num_bytes, grouping=True) + ' bytes'
    if num_bytes > YiB:
        output += ' (%.3g YiB)' % (num_bytes / YiB)
    elif num_bytes > ZiB:
        output += ' (%.3g ZiB)' % (num_bytes / ZiB)
    elif num_bytes > EiB:
        output += ' (%.3g EiB)' % (num_bytes / EiB)
    elif num_bytes > PiB:
        output += ' (%.3g PiB)' % (num_bytes / PiB)
    elif num_bytes > TiB:
        output += ' (%.3g TiB)' % (num_bytes / TiB)
    elif num_bytes > GiB:
        output += ' (%.3g GiB)' % (num_bytes / GiB)
    elif num_bytes > MiB:
        output += ' (%.3g MiB)' % (num_bytes / MiB)
    elif num_bytes > KiB:
        output += ' (%.3g KiB)' % (num_bytes / KiB)
    print(output)


if __name__ == "__main__":
    if (len(sys.argv) != 2) or (sys.argv[1][0] == '-'):
        usage()
        sys.exit(1)
    else:
        url = sys.argv[1]
        bytes = get_url_num_bytes(url)
        pretty_print(bytes)

