extras/system_tools.py from inasafe/inasafe

extras/system_tools.py
Summary

Maintainability

2 hrs
Test Coverage

Issues
"""Implementation of tools to do with system administration made
as platform independent as possible.
"""

import sys
import os
import string
import urllib
import urllib2
import getpass
import tarfile
import warnings

try:
    import hashlib
except ImportError:
    import md5 as hashlib

def get_user_name():
    """Get user name provide by operating system
    """

    if sys.platform == 'win32':
        #user = os.getenv('USERPROFILE')
        user = os.getenv('USERNAME')
    else:
        user = os.getenv('LOGNAME')

    return user

def get_host_name():
    """Get host name provide by operating system
    """

    if sys.platform == 'win32':
        host = os.getenv('COMPUTERNAME')
    else:
        host = os.uname()[1]


    return host


def safe_crc(string):
    """64 bit safe crc computation.

    See http://docs.python.org/library/zlib.html#zlib.crc32:

        To generate the same numeric value across all Python versions
        and platforms use crc32(data) & 0xffffffff.
    """

    from zlib import crc32

    return crc32(string) & 0xffffffff


def compute_checksum(filename, max_length=2**20):
    """Compute the CRC32 checksum for specified file

    Optional parameter max_length sets the maximum number
    of bytes used to limit time used with large files.
    Default = 2**20 (1MB)
    """

    fid = open(filename, 'rb') # Use binary for portability
    crcval = safe_crc(fid.read(max_length))
    fid.close()

    return crcval

def get_pathname_from_package(package):
    """Get pathname of given package (provided as string)

    This is useful for reading files residing in the same directory as
    a particular module. Typically, this is required in unit tests depending
    on external files.

    The given module must start from a directory on the pythonpath
    and be importable using the import statement.

    Example
    path = get_pathname_from_package('anuga.utilities')

    """

    exec('import %s as x' %package)

    path = x.__path__[0]

    return path

    # Alternative approach that has been used at times
    #try:
    #    # When unit test is run from current dir
    #    p1 = read_polygon('mainland_only.csv')
    #except:
    #    # When unit test is run from ANUGA root dir
    #    from os.path import join, split
    #    dir, tail = split(__file__)
    #    path = join(dir, 'mainland_only.csv')
    #    p1 = read_polygon(path)


##
# @brief Split a string into 'clean' fields.
# @param str The string to process.
# @param delimiter The delimiter string to split 'line' with.
# @return A list of 'cleaned' field strings.
# @note Any fields that were initially zero length will be removed.
# @note If a field contains '\n' it isn't zero length.
def clean_line(str, delimiter):
    """Split string on given delimiter, remove whitespace from each field."""

    return [x.strip() for x in str.strip().split(delimiter) if x != '']


################################################################################
# The following two functions are used to get around a problem with numpy and
# NetCDF files.  Previously, using Numeric, we could take a list of strings and
# convert to a Numeric array resulting in this:
#     Numeric.array(['abc', 'xy']) -> [['a', 'b', 'c'],
#                                      ['x', 'y', ' ']]
#
# However, under numpy we get:
#     numpy.array(['abc', 'xy']) -> ['abc',
#                                    'xy']
#
# And writing *strings* to a NetCDF file is problematic.
#
# The solution is to use these two routines to convert a 1-D list of strings
# to the 2-D list of chars form and back.  The 2-D form can be written to a
# NetCDF file as before.
#
# The other option, of inverting a list of tag strings into a dictionary with
# keys being the unique tag strings and the key value a list of indices of where
# the tag string was in the original list was rejected because:
#    1. It's a lot of work
#    2. We'd have to rewite the I/O code a bit (extra variables instead of one)
#    3. The code below is fast enough in an I/O scenario
################################################################################

##
# @brief Convert 1-D list of strings to 2-D list of chars.
# @param l 1-dimensional list of strings.
# @return A 2-D list of 'characters' (1 char strings).
# @note No checking that we supply a 1-D list.
def string_to_char(l):
    '''Convert 1-D list of strings to 2-D list of chars.'''

    if not l:
        return []

    if l == ['']:
        l = [' ']

    maxlen = reduce(max, map(len, l))
    ll = [x.ljust(maxlen) for x in l]
    result = []
    for s in ll:
        result.append([x for x in s])
    return result


##
# @brief Convert 2-D list of chars to 1-D list of strings.
# @param ll 2-dimensional list of 'characters' (1 char strings).
# @return A 1-dimensional list of strings.
# @note Each string has had right-end spaces removed.
def char_to_string(ll):
    '''Convert 2-D list of chars to 1-D list of strings.'''

    return map(string.rstrip, [''.join(x) for x in ll])

################################################################################

##
# @brief Get list of variable names in a python expression string.
# @param source A string containing a python expression.
# @return A list of variable name strings.
# @note Throws SyntaxError exception if not a valid expression.
def get_vars_in_expression(source):
    '''Get list of variable names in a python expression.'''

    import compiler
    from compiler.ast import Node

    ##
    # @brief Internal recursive function.
    # @param node An AST parse Node.
    # @param var_list Input list of variables.
    # @return An updated list of variables.
    def get_vars_body(node, var_list=[]):
        if isinstance(node, Node):
            if node.__class__.__name__ == 'Name':
                for child in node.getChildren():
                    if child not in var_list:
                        var_list.append(child)
            for child in node.getChildren():
                if isinstance(child, Node):
                    for child in node.getChildren():
                        var_list = get_vars_body(child, var_list)
                    break

        return var_list

    return get_vars_body(compiler.parse(source))


##
# @brief Get a file from the web.
# @param file_url URL of the file to fetch.
# @param file_name Path to file to create in the filesystem.
# @param auth Auth tuple (httpproxy, proxyuser, proxypass).
# @param blocksize Read file in this block size.
# @return (True, auth) if successful, else (False, auth).
# @note If 'auth' not supplied, will prompt user.
# @note Will try using environment variable HTTP_PROXY for proxy server.
# @note Will try using environment variable PROXY_USERNAME for proxy username.
# @note Will try using environment variable PROXY_PASSWORD for proxy password.
def get_web_file(file_url, file_name, auth=None, blocksize=1024*1024):
    '''Get a file from the web (HTTP).

    file_url:  The URL of the file to get
    file_name: Local path to save loaded file in
    auth:      A tuple (httpproxy, proxyuser, proxypass)
    blocksize: Block size of file reads

    Will try simple load through urllib first.  Drop down to urllib2
    if there is a proxy and it requires authentication.

    Environment variable HTTP_PROXY can be used to supply proxy information.
    PROXY_USERNAME is used to supply the authentication username.
    PROXY_PASSWORD supplies the password, if you dare!
    '''

    # Simple fetch, if fails, check for proxy error
    try:
        urllib.urlretrieve(file_url, file_name)
        return (True, auth)     # no proxy, no auth required
    except IOError, e:
        if e[1] == 407:     # proxy error
            pass
        elif e[1][0] == 113:  # no route to host
            print 'No route to host for %s' % file_url
            return (False, auth)    # return False
        else:
            print 'Unknown connection error to %s' % file_url
            return (False, auth)

    # We get here if there was a proxy error, get file through the proxy
    # unpack auth info
    try:
        (httpproxy, proxyuser, proxypass) = auth
    except:
        (httpproxy, proxyuser, proxypass) = (None, None, None)

    # fill in any gaps from the environment
    if httpproxy is None:
        httpproxy = os.getenv('HTTP_PROXY')
    if proxyuser is None:
        proxyuser = os.getenv('PROXY_USERNAME')
    if proxypass is None:
        proxypass = os.getenv('PROXY_PASSWORD')

    # Get auth info from user if still not supplied
    if httpproxy is None or proxyuser is None or proxypass is None:
        print '-'*72
        print ('You need to supply proxy authentication information.')
        if httpproxy is None:
            httpproxy = raw_input('                    proxy server: ')
        else:
            print '         HTTP proxy was supplied: %s' % httpproxy
        if proxyuser is None:
            proxyuser = raw_input('                  proxy username: ')
        else:
            print 'HTTP proxy username was supplied: %s' % proxyuser
        if proxypass is None:
            proxypass = getpass.getpass('                  proxy password: ')
        else:
            print 'HTTP proxy password was supplied: %s' % '*'*len(proxyuser)
        print '-'*72

    # the proxy URL cannot start with 'http://', we add that later
    httpproxy = httpproxy.lower()
    if httpproxy.startswith('http://'):
        httpproxy = httpproxy.replace('http://', '', 1)

    # open remote file
    proxy = urllib2.ProxyHandler({'http': 'http://' + proxyuser
                                              + ':' + proxypass
                                              + '@' + httpproxy})
    authinfo = urllib2.HTTPBasicAuthHandler()
    opener = urllib2.build_opener(proxy, authinfo, urllib2.HTTPHandler)
    urllib2.install_opener(opener)
    try:
        webget = urllib2.urlopen(file_url)
    except urllib2.HTTPError, e:
        print 'Error received from proxy:\n%s' % str(e)
        print 'Possibly the user/password is wrong.'
        return (False, (httpproxy, proxyuser, proxypass))

    # transfer file to local filesystem
    fd = open(file_name, 'wb')
    while True:
        data = webget.read(blocksize)
        if len(data) == 0:
            break
        fd.write(data)
    fd.close
    webget.close()

    # return successful auth info
    return (True, (httpproxy, proxyuser, proxypass))


##
# @brief Tar a file (or directory) into a tarfile.
# @param files A list of files (or directories) to tar.
# @param tarfile The created tarfile name.
# @note 'files' may be a string (single file) or a list of strings.
# @note We use gzip compression.
def tar_file(files, tarname):
    '''Compress a file or directory into a tar file.'''

    if isinstance(files, basestring):
        files = [files]

    o = tarfile.open(tarname, 'w:gz')
    for file in files:
        o.add(file)
    o.close()


##
# @brief Untar a file into an optional target directory.
# @param tarname Name of the file to untar.
# @param target_dir Directory to untar into.
def untar_file(tarname, target_dir='.'):
    '''Uncompress a tar file.'''

    o = tarfile.open(tarname, 'r:gz')
    members = o.getmembers()
    for member in members:
        o.extract(member, target_dir)
    o.close()


##
# @brief Return a hex digest (MD5) of a given file.
# @param filename Path to the file of interest.
# @param blocksize Size of data blocks to read.
# @return A hex digest string (16 bytes).
# @note Uses MD5 digest if hashlib not available.
def get_file_hexdigest(filename, blocksize=1024*1024*10):
    '''Get a hex digest of a file.'''

    if hashlib.__name__ == 'hashlib':
        m = hashlib.md5()       # new - 'hashlib' module
    else:
        m = hashlib.new()       # old - 'md5' module - remove once py2.4 gone
    fd = open(filename, 'r')

    while True:
        data = fd.read(blocksize)
        if len(data) == 0:
            break
        m.update(data)

    fd.close()
    return m.hexdigest()


##
# @brief Create a file containing a hexdigest string of a data file.
# @param data_file Path to the file to get the hexdigest from.
# @param digest_file Path to hexdigest file to create.
# @note Uses MD5 digest.
def make_digest_file(data_file, digest_file):
    '''Create a file containing the hex digest string of a data file.'''

    hexdigest = get_file_hexdigest(data_file)
    fd = open(digest_file, 'w')
    fd.write(hexdigest)
    fd.close()


##
# @brief Function to return the length of a file.
# @param in_file Path to file to get length of.
# @return Number of lines in file.
# @note Doesn't count '\n' characters.
# @note Zero byte file, returns 0.
# @note No \n in file at all, but >0 chars, returns 1.
def file_length(in_file):
    '''Function to return the length of a file.'''

    fid = open(in_file)
    data = fid.readlines()
    fid.close()
    return len(data)