#
# (c)2006 LIRIS - University Claude Bernard Lyon 1
# http://liris.cnrs.fr/
#
# Author: Pierre-Antoine CHAMPIN
# http://champin.net/
#
# This software is distributed under the terms of the GNU LGPL v2.1.
# See LICENSE.txt for more details.
#

"""
An implementation of [1].

[1] Representing data as resources in RDF and OWL. Pierre-Antoine Champin.
    LIRIS research report #?. 2006.
"""

VERSION = "0.9"
PREFIX = "http://liris.cnrs.fr/2006/08/data-uri#"

from decimal  import Decimal, InvalidOperation
from urllib   import quote, unquote
from warnings import warn

def uri_for (value, prefix=PREFIX):
    """
    Attempts to convert C{value} into a data-uri.

    Raises a TypeError if C{value}'s type cannot be converted.

    Raises a ValueError if C{value} is a string and contains the null
    character.

    @param value: accepts types int, long, float, Decimal, str, unicode or
                  tuple.
    @param prefix: if given, use an alternative URI prefix; else use L{PREFIX}
    @return: the data-uri as a str
    """
    if isinstance (value, tuple):
        part = _uripart_for_tuple (value)
    else:
        part = _uripart_for (value)
        if part is None:
            raise TypeError, "Cannot build data-uri for %s instances" \
                             % value.__class__
    return prefix + part

def value_for (uri, prefix=PREFIX):
    """
    Attempts to convert C{uri} into the relevant value.

    Raises a ValueError if C{uri} is not a valid data-uri.

    @param uri: the data-uri as a str
    @param prefix: if given, use an alternative URI prefix; else use L{PREFIX}

    @return: a Decimal for number-uris, a unicode for string-uris or a tuple of
             Decimals/unicode/NoneType for tuple-uris.
    """
    if not uri.startswith (prefix):
        raise ValueError, "Argument is not a data-uri <%s>" % uri
    part = uri[_ln:]
    if part.startswith ("tpl:"):
        return _value_for_tuple (part[4:])
    else:
        return _value_for_simple (uri[_ln:])


def _uripart_for (value):
    """
    Attempts to convert C{value} into a C{simple_val} [1].

    Raises a ValueError if C{value} is a string and contains the null
    character.

    @param value: accepts types int, long, float, Decimal, str, unicode.
    @return: the C{simple_val} [1] or None if value has no correct type
    """
    if isinstance (value, int) \
    or isinstance (value, long):
        return _uripart_for_number (Decimal(value))
    elif isinstance (value, float):
        return _uripart_for_number (Decimal(repr(value)))
    elif isinstance (value, Decimal):
        return _uripart_for_number (value)
    elif isinstance (value, str):
        return _uripart_for_unicode (value.decode())
    elif isinstance (value, unicode):
        return _uripart_for_unicode (value)
    else:
        return None

def _uripart_for_tuple (tpl):
    """
    Attempts to convert C{tpl} into a C{tuple_val} [1].

    Raises a TypeError if an element of the tuple could not be converted.

    @param tpl: a tuple
    @return: the C{tuple_val}
    """
    r = "tpl:"
    sep = ""
    for i,val in enumerate (tpl):
        if val is None:
            val = ""
        else:
            val = _uripart_for (val)
            if val is None:
                raise TypeError, "Cannot build data-uri for %s instances " \
                                 "(element %s of tuple)" \
                                 % (val.__class__, i)
        r = "%s%s%s" % (r, sep, val)
        sep = "%00"
    return r
    
def _uripart_for_number (num):
    """
    Converts a Decimal into a C{number_val} [1].

    @param num: a Decimal
    @return: the corresponding C{number_val}
    """
    sign, digits, exp = num.as_tuple()

    # remove trailing zeroes
    # NB: do not use num.normalize, for it is sensitive to the default
    # precision (getcontext().prec) and might truncate some significant digits
    trailing_zeros = 0
    if digits[0] != 0:
        while digits[-1-trailing_zeros] == 0:
            trailing_zeros += 1
            exp += 1

    r = "num:"
    if sign:
        r += "-"
    r += "".join (str (i) for i in digits[:len(digits)-trailing_zeros])
    if exp:
        r += "e%s" % exp
    return r

def _uripart_for_unicode (txt):
    """
    Attempts to convert a unicode into the corresponding C{text_val} [1].

    Raises a ValueError if C{txt} contains the null character.

    @param txt: the data-uri as a str
    @return: the corresponding C{text_val}
    """
    if '\0' in txt:
        raise ValueError, "Cannot build data-uri for text containing a null " \
                          "character %r" % txt
    return "txt:%s" % quote(txt.encode("UTF_8"))

def _value_for_tuple (string):
    """
    Attempts to convert the value part of C{tuple_val} (i.e. the part after
    the C{tpl:} prefix) [1] into a tuple.

    Raises a ValueError if a component of C{part} is not a valid C{simple_val}. 

    @param string: the value part of the C{number_val} as a str
    @return: a Decimal
    """
    lst = string.split("%00")
    for index,item in enumerate (lst):
        if len (item) == 0:
            lst[index] = None
        else:
            lst[index] = _value_for_simple (item)
    return tuple (lst)

def _value_for_simple (simple_val):
    """
    Attempts to convert C{simple_val} (according to the BNF) into the relevant
    value.

    Raises a ValueError if C{uri} is not a valid data-uri.

    @param simple_val: the C{simple_val} as a str
    @return: a Decimal for number_val, a unicode for text_val, or a tuple of
             Decimals/unicode/NoneType for tuple_val.
    """
    if simple_val.startswith ("num:"):
        return _value_for_number (simple_val[4:])
    elif simple_val.startswith ("txt:"):
        return _value_for_text (simple_val[4:])
    else:
        raise ValueError, "Unrecognized simple_val value <%s>" % simple_val

def _value_for_number (part):
    """
    Attempts to convert the value part of C{number_val} (i.e. the part after
    the C{num:} prefix) [1] into a Decimal.

    Raises a ValueError if C{part} is not a valid decimal value.

    Issues a L{DataUriWarning} if part is a valid decimal value but not in
    canonical form.

    @param part: the value part of the C{number_val} as a str
    @return: a Decimal
    """
    try:
        d = Decimal (part)
    except InvalidOperation:
        raise ValueError, "Invalid literal value for number %s" % part
    check = _uripart_for_number (d)[4:]
    if check != part:
        warn (DataUriWarning ("Non-canonical number in data-uri %s" % part))
    return d

def _value_for_text (part):
    """
    Attempts to convert the value part of a C{text_val} (i.e. the part after 
    the C{txt:} prefix) [1] into a unicode. It is hence assumed that part is
    the result of URL-encoding the UTF-8 encoding a unicode text.

    Raises a ValueError if C{part} is not correctly encoded.

    Issues a L{DataUriWarning} if part contains the text '%00' (null 
    character, should not be in text).

    @param part: the value part of the C{text_val} as a str
    @return: a unicode
    """
    if "%00" in part:
        warn (DataUriWarning ("Text data-uri contains null character %s" \
                              % part))
    try:
        return unquote (part).decode ("UTF_8")
    except UnicodeDecodeError, e:
        raise ValueError, e


class DataUriWarning (Warning):
    pass


_ln = len (PREFIX)
