formathelper.py | Explore in Territory

#
# Copyright (c) 2008-2012 Stefan Krah. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# 1. Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
# SUCH DAMAGE.
#


# Generate PEP-3101 format strings.


import os, sys, locale, random
import platform, subprocess
from test.support.import_helper import import_fresh_module
from shutil import which

C = import_fresh_module('decimal', fresh=['_decimal'])
P = import_fresh_module('decimal', blocked=['_decimal'])


windows_lang_strings = [
  "chinese", "chinese-simplified", "chinese-traditional", "czech", "danish",
  "dutch", "belgian", "english", "australian", "canadian", "english-nz",
  "english-uk", "english-us", "finnish", "french", "french-belgian",
  "french-canadian", "french-swiss", "german", "german-austrian",
  "german-swiss", "greek", "hungarian", "icelandic", "italian", "italian-swiss",
  "japanese", "korean", "norwegian", "norwegian-bokmal", "norwegian-nynorsk",
  "polish", "portuguese", "portuguese-brazil", "russian", "slovak", "spanish",
  "spanish-mexican", "spanish-modern", "swedish", "turkish",
]

preferred_encoding = {
  'cs_CZ': 'ISO8859-2',
  'cs_CZ.iso88592': 'ISO8859-2',
  'czech': 'ISO8859-2',
  'eesti': 'ISO8859-1',
  'estonian': 'ISO8859-1',
  'et_EE': 'ISO8859-15',
  'et_EE.ISO-8859-15': 'ISO8859-15',
  'et_EE.iso885915': 'ISO8859-15',
  'et_EE.iso88591': 'ISO8859-1',
  'fi_FI.iso88591': 'ISO8859-1',
  'fi_FI': 'ISO8859-15',
  'fi_FI@euro': 'ISO8859-15',
  'fi_FI.iso885915@euro': 'ISO8859-15',
  'finnish': 'ISO8859-1',
  'lv_LV': 'ISO8859-13',
  'lv_LV.iso885913': 'ISO8859-13',
  'nb_NO': 'ISO8859-1',
  'nb_NO.iso88591': 'ISO8859-1',
  'bokmal': 'ISO8859-1',
  'nn_NO': 'ISO8859-1',
  'nn_NO.iso88591': 'ISO8859-1',
  'no_NO': 'ISO8859-1',
  'norwegian': 'ISO8859-1',
  'nynorsk': 'ISO8859-1',
  'ru_RU': 'ISO8859-5',
  'ru_RU.iso88595': 'ISO8859-5',
  'russian': 'ISO8859-5',
  'ru_RU.KOI8-R': 'KOI8-R',
  'ru_RU.koi8r': 'KOI8-R',
  'ru_RU.CP1251': 'CP1251',
  'ru_RU.cp1251': 'CP1251',
  'sk_SK': 'ISO8859-2',
  'sk_SK.iso88592': 'ISO8859-2',
  'slovak': 'ISO8859-2',
  'sv_FI': 'ISO8859-1',
  'sv_FI.iso88591': 'ISO8859-1',
  'sv_FI@euro': 'ISO8859-15',
  'sv_FI.iso885915@euro': 'ISO8859-15',
  'uk_UA': 'KOI8-U',
  'uk_UA.koi8u': 'KOI8-U'
}

integers = [
  "",
  "1",
  "12",
  "123",
  "1234",
  "12345",
  "123456",
  "1234567",
  "12345678",
  "123456789",
  "1234567890",
  "12345678901",
  "123456789012",
  "1234567890123",
  "12345678901234",
  "123456789012345",
  "1234567890123456",
  "12345678901234567",
  "123456789012345678",
  "1234567890123456789",
  "12345678901234567890",
  "123456789012345678901",
  "1234567890123456789012",
]

numbers = [
  "0", "-0", "+0",
  "0.0", "-0.0", "+0.0",
  "0e0", "-0e0", "+0e0",
  ".0", "-.0",
  ".1", "-.1",
  "1.1", "-1.1",
  "1e1", "-1e1"
]

# Get the list of available locales.
if platform.system() == 'Windows':
    locale_list = windows_lang_strings
else:
    locale_list = ['C']
    if os.path.isfile("/var/lib/locales/supported.d/local"):
        # On Ubuntu, `locale -a` gives the wrong case for some locales,
        # so we get the correct names directly:
        with open("/var/lib/locales/supported.d/local") as f:
            locale_list = [loc.split()[0] for loc in f.readlines() \
                           if not loc.startswith('#')]
    elif which('locale'):
        locale_list = subprocess.Popen(["locale", "-a"],
                          stdout=subprocess.PIPE).communicate()[0]
        try:
            locale_list = locale_list.decode()
        except UnicodeDecodeError:
            # Some distributions insist on using latin-1 characters
            # in their locale names.
            locale_list = locale_list.decode('latin-1')
        locale_list = locale_list.split('\n')
try:
    locale_list.remove('')
except ValueError:
    pass

# Debian
if os.path.isfile("/etc/locale.alias"):
    with open("/etc/locale.alias") as f:
        while 1:
            try:
                line = f.readline()
            except UnicodeDecodeError:
                continue
            if line == "":
                break
            if line.startswith('#'):
                continue
            x = line.split()
            if len(x) == 2:
                if x[0] in locale_list:
                    locale_list.remove(x[0])

# FreeBSD
if platform.system() == 'FreeBSD':
    # http://www.freebsd.org/cgi/query-pr.cgi?pr=142173
    # en_GB.US-ASCII has 163 as the currency symbol.
    for loc in ['it_CH.ISO8859-1', 'it_CH.ISO8859-15', 'it_CH.UTF-8',
                'it_IT.ISO8859-1', 'it_IT.ISO8859-15', 'it_IT.UTF-8',
                'sl_SI.ISO8859-2', 'sl_SI.UTF-8',
                'en_GB.US-ASCII']:
        try:
            locale_list.remove(loc)
        except ValueError:
            pass

# Print a testcase in the format of the IBM tests (for runtest.c):
def get_preferred_encoding():
    loc = locale.setlocale(locale.LC_CTYPE)
    if loc in preferred_encoding:
        return preferred_encoding[loc]
    else:
        return locale.getpreferredencoding()

def printit(testno, s, fmt, encoding=None):
    if not encoding:
        encoding = get_preferred_encoding()
    try:
        result = format(P.Decimal(s), fmt)
        fmt = str(fmt.encode(encoding))[2:-1]
        result = str(result.encode(encoding))[2:-1]
        if "'" in result:
            sys.stdout.write("xfmt%d  format  %s  '%s'  ->  \"%s\"\n"
                             % (testno, s, fmt, result))
        else:
            sys.stdout.write("xfmt%d  format  %s  '%s'  ->  '%s'\n"
                             % (testno, s, fmt, result))
    except Exception as err:
        sys.stderr.write("%s  %s  %s\n" % (err, s, fmt))


# Check if an integer can be converted to a valid fill character.
def check_fillchar(i):
    try:
        c = chr(i)
        c.encode('utf-8').decode()
        format(P.Decimal(0), c + '<19g')
        return c
    except:
        return None

# Generate all unicode characters that are accepted as
# fill characters by decimal.py.
def all_fillchars():
    for i in range(0, 0x110002):
        c = check_fillchar(i)
        if c: yield c

# Return random fill character.
def rand_fillchar():
    while 1:
        i = random.randrange(0, 0x110002)
        c = check_fillchar(i)
        if c: return c

# Generate random format strings
# [[fill]align][sign][#][0][width][.precision][type]
def rand_format(fill, typespec='EeGgFfn%'):
    active = sorted(random.sample(range(7), random.randrange(8)))
    have_align = 0
    s = ''
    for elem in active:
        if elem == 0: # fill+align
            s += fill
            s += random.choice('<>=^')
            have_align = 1
        elif elem == 1: # sign
            s += random.choice('+- ')
        elif elem == 2 and not have_align: # zeropad
            s += '0'
        elif elem == 3: # width
            s += str(random.randrange(1, 100))
        elif elem == 4: # thousands separator
            s += ','
        elif elem == 5: # prec
            s += '.'
            s += str(random.randrange(100))
        elif elem == 6:
            if 4 in active: c = typespec.replace('n', '')
            else: c = typespec
            s += random.choice(c)
    return s

# Partially brute force all possible format strings containing a thousands
# separator. Fall back to random where the runtime would become excessive.
# [[fill]align][sign][#][0][width][,][.precision][type]
def all_format_sep():
    for align in ('', '<', '>', '=', '^'):
        for fill in ('', 'x'):
            if align == '': fill = ''
            for sign in ('', '+', '-', ' '):
                for zeropad in ('', '0'):
                    if align != '': zeropad = ''
                    for width in ['']+[str(y) for y in range(1, 15)]+['101']:
                        for prec in ['']+['.'+str(y) for y in range(15)]:
                            # for type in ('', 'E', 'e', 'G', 'g', 'F', 'f', '%'):
                            type = random.choice(('', 'E', 'e', 'G', 'g', 'F', 'f', '%'))
                            yield ''.join((fill, align, sign, zeropad, width, ',', prec, type))

# Partially brute force all possible format strings with an 'n' specifier.
# [[fill]align][sign][#][0][width][,][.precision][type]
def all_format_loc():
    for align in ('', '<', '>', '=', '^'):
        for fill in ('', 'x'):
            if align == '': fill = ''
            for sign in ('', '+', '-', ' '):
                for zeropad in ('', '0'):
                    if align != '': zeropad = ''
                    for width in ['']+[str(y) for y in range(1, 20)]+['101']:
                        for prec in ['']+['.'+str(y) for y in range(1, 20)]:
                            yield ''.join((fill, align, sign, zeropad, width, prec, 'n'))

# Generate random format strings with a unicode fill character
# [[fill]align][sign][#][0][width][,][.precision][type]
def randfill(fill):
    active = sorted(random.sample(range(5), random.randrange(6)))
    s = ''
    s += str(fill)
    s += random.choice('<>=^')
    for elem in active:
        if elem == 0: # sign
            s += random.choice('+- ')
        elif elem == 1: # width
            s += str(random.randrange(1, 100))
        elif elem == 2: # thousands separator
            s += ','
        elif elem == 3: # prec
            s += '.'
            s += str(random.randrange(100))
        elif elem == 4:
            if 2 in active: c = 'EeGgFf%'
            else: c = 'EeGgFfn%'
            s += random.choice(c)
    return s

# Generate random format strings with random locale setting
# [[fill]align][sign][#][0][width][,][.precision][type]
def rand_locale():
    try:
        loc = random.choice(locale_list)
        locale.setlocale(locale.LC_ALL, loc)
    except locale.Error as err:
        pass
    active = sorted(random.sample(range(5), random.randrange(6)))
    s = ''
    have_align = 0
    for elem in active:
        if elem == 0: # fill+align
            s += chr(random.randrange(32, 128))
            s += random.choice('<>=^')
            have_align = 1
        elif elem == 1: # sign
            s += random.choice('+- ')
        elif elem == 2 and not have_align: # zeropad
            s += '0'
        elif elem == 3: # width
            s += str(random.randrange(1, 100))
        elif elem == 4: # prec
            s += '.'
            s += str(random.randrange(100))
    s += 'n'
    return s
cpython/Modules/_decimal/tests/formathelper.py