#!/usr/bin/env python3 # Python 2 and 3 compatible. """ Pretty print the size of a file at a URL. It's pretty forgiving about URL formats and follows redirections. Example usage: % urlsize www.cs.toronto.edu/~murray/pub/07thesis/murray_thesis_2007.pdf Reporting size of file at: http://homepages.inf.ed.ac.uk/imurray2/pub/07thesis/murray_thesis_2007.pdf 3,650,691 bytes (3.48 MiB) """ # Iain Murray, June 2011. Tweaks for python3 June 2012. For python >=3.7 March 2023. from __future__ import division import sys, locale, re, os.path using_py2 = True try: import urllib2 except: import urllib.request as urllib2 using_py2 = False def usage(): print('') print(' Usage: ' + sys.argv[0] + ' http://host/location\n') print(' pretty prints the size of the remote file.') print('') def get_url_num_bytes(url, verbose=True): """ Get number of bytes of file at a URL. None if not reported. """ # urllib2 could be a bit more intelligent in guessing what I mean: if not re.match('^[a-zA-Z]*:', url): if os.path.exists(url): url = 'file:' + url else: url = 'http://' + url try: class HeadRequest(urllib2.Request): def get_method(self): return "HEAD" response = urllib2.urlopen(HeadRequest( url, None, {'User-Agent': 'Mozilla/5.0'})) if using_py2: num_bytes = response.info().getheader('content-length') else: # For python 3 num_bytes = response.info().get('content-length') if verbose: # urllib does redirections for me. Report if it's been clever: if response.geturl() != url: print('Reporting size of file at: ' + response.geturl()) except: print('Failed to connect to url.') num_bytes = None if num_bytes is not None: num_bytes = int(num_bytes) return num_bytes def pretty_print(num_bytes): """ Output number of bytes according to locale and with IEC binary prefixes """ if num_bytes is None: print('File size unavailable.') return KiB = 1024 MiB = KiB * KiB GiB = KiB * MiB TiB = KiB * GiB PiB = KiB * TiB EiB = KiB * PiB ZiB = KiB * EiB YiB = KiB * ZiB locale.setlocale(locale.LC_ALL, '') output = locale.format_string("%d", num_bytes, grouping=True) + ' bytes' if num_bytes > YiB: output += ' (%.3g YiB)' % (num_bytes / YiB) elif num_bytes > ZiB: output += ' (%.3g ZiB)' % (num_bytes / ZiB) elif num_bytes > EiB: output += ' (%.3g EiB)' % (num_bytes / EiB) elif num_bytes > PiB: output += ' (%.3g PiB)' % (num_bytes / PiB) elif num_bytes > TiB: output += ' (%.3g TiB)' % (num_bytes / TiB) elif num_bytes > GiB: output += ' (%.3g GiB)' % (num_bytes / GiB) elif num_bytes > MiB: output += ' (%.3g MiB)' % (num_bytes / MiB) elif num_bytes > KiB: output += ' (%.3g KiB)' % (num_bytes / KiB) print(output) if __name__ == "__main__": if (len(sys.argv) != 2) or (sys.argv[1][0] == '-'): usage() sys.exit(1) else: url = sys.argv[1] bytes = get_url_num_bytes(url) pretty_print(bytes)