cpython/Tools/c-analyzer/cpython/_analyzer.py

import os.path

from c_common.clsutil import classonly
from c_parser.info import (
    KIND,
    Declaration,
    TypeDeclaration,
    Member,
    FIXED_TYPE,
)
from c_parser.match import (
    is_pots,
    is_funcptr,
)
from c_analyzer.match import (
    is_system_type,
    is_process_global,
    is_fixed_type,
    is_immutable,
)
import c_analyzer as _c_analyzer
import c_analyzer.info as _info
import c_analyzer.datafiles as _datafiles
from . import _parser, REPO_ROOT


_DATA_DIR = os.path.dirname(__file__)
KNOWN_FILE = os.path.join(_DATA_DIR, 'known.tsv')
IGNORED_FILE = os.path.join(_DATA_DIR, 'ignored.tsv')
NEED_FIX_FILE = os.path.join(_DATA_DIR, 'globals-to-fix.tsv')
KNOWN_IN_DOT_C = {
    'struct _odictobject': False,
    'PyTupleObject': False,
    'struct _typeobject': False,
    'struct _arena': True,  # ???
    'struct _frame': False,
    'struct _ts': True,  # ???
    'struct PyCodeObject': False,
    'struct _is': True,  # ???
    'PyWideStringList': True,  # ???
    # recursive
    'struct _dictkeysobject': False,
}
# These are loaded from the respective .tsv files upon first use.
_KNOWN = {
    # {(file, ID) | ID => info | bool}
    #'PyWideStringList': True,
}
#_KNOWN = {(Struct(None, typeid.partition(' ')[-1], None)
#           if typeid.startswith('struct ')
#           else TypeDef(None, typeid, None)
#           ): ([], {'unsupported': None if supported else True})
#          for typeid, supported in _KNOWN_IN_DOT_C.items()}
_IGNORED = {
    # {ID => reason}
}

# XXX We should be handling these through known.tsv.
_OTHER_SUPPORTED_TYPES = {
    # Holds tuple of strings, which we statically initialize:
    '_PyArg_Parser',
    # Uses of these should be const, but we don't worry about it.
    'PyModuleDef',
    'PyModuleDef_Slot[]',
    'PyType_Spec',
    'PyType_Slot[]',
    'PyMethodDef',
    'PyMethodDef[]',
    'PyMemberDef[]',
    'PyGetSetDef[]',
    'PyNumberMethods',
    'PySequenceMethods',
    'PyMappingMethods',
    'PyAsyncMethods',
    'PyBufferProcs',
    'PyStructSequence_Field[]',
    'PyStructSequence_Desc',
}

# XXX We should normalize all cases to a single name,
# e.g. "kwlist" (currently the most common).
_KWLIST_VARIANTS = [
    ('*', 'kwlist'),
    ('*', 'keywords'),
    ('*', 'kwargs'),
    ('Modules/_csv.c', 'dialect_kws'),
    ('Modules/_datetimemodule.c', 'date_kws'),
    ('Modules/_datetimemodule.c', 'datetime_kws'),
    ('Modules/_datetimemodule.c', 'time_kws'),
    ('Modules/_datetimemodule.c', 'timezone_kws'),
    ('Modules/_lzmamodule.c', 'optnames'),
    ('Modules/_lzmamodule.c', 'arg_names'),
    ('Modules/cjkcodecs/multibytecodec.c', 'incnewkwarglist'),
    ('Modules/cjkcodecs/multibytecodec.c', 'streamkwarglist'),
    ('Modules/socketmodule.c', 'kwnames'),
]

KINDS = frozenset((*KIND.TYPES, KIND.VARIABLE))


def read_known():
    if not _KNOWN:
        # Cache a copy the first time.
        extracols = None  # XXX
        #extracols = ['unsupported']
        known = _datafiles.read_known(KNOWN_FILE, extracols, REPO_ROOT)
        # For now we ignore known.values() (i.e. "extra").
        types, _ = _datafiles.analyze_known(
            known,
            analyze_resolved=analyze_resolved,
        )
        _KNOWN.update(types)
    return _KNOWN.copy()


def write_known():
    raise NotImplementedError
    datafiles.write_known(decls, IGNORED_FILE, ['unsupported'], relroot=REPO_ROOT)


def read_ignored():
    if not _IGNORED:
        _IGNORED.update(_datafiles.read_ignored(IGNORED_FILE, relroot=REPO_ROOT))
        _IGNORED.update(_datafiles.read_ignored(NEED_FIX_FILE, relroot=REPO_ROOT))
    return dict(_IGNORED)


def write_ignored():
    raise NotImplementedError
    _datafiles.write_ignored(variables, IGNORED_FILE, relroot=REPO_ROOT)


def analyze(filenames, *,
            skip_objects=False,
            **kwargs
            ):
    if skip_objects:
        # XXX Set up a filter.
        raise NotImplementedError

    known = read_known()

    decls = iter_decls(filenames)
    results = _c_analyzer.analyze_decls(
        decls,
        known,
        analyze_resolved=analyze_resolved,
    )
    analysis = Analysis.from_results(results)

    return analysis


def iter_decls(filenames, **kwargs):
    decls = _c_analyzer.iter_decls(
        filenames,
        # We ignore functions (and statements).
        kinds=KINDS,
        parse_files=_parser.parse_files,
        **kwargs
    )
    for decl in decls:
        if not decl.data:
            # Ignore forward declarations.
            continue
        yield decl


def analyze_resolved(resolved, decl, types, knowntypes, extra=None):
    if decl.kind not in KINDS:
        # Skip it!
        return None

    typedeps = resolved
    if typedeps is _info.UNKNOWN:
        if decl.kind in (KIND.STRUCT, KIND.UNION):
            typedeps = [typedeps] * len(decl.members)
        else:
            typedeps = [typedeps]
    #assert isinstance(typedeps, (list, TypeDeclaration)), typedeps

    if extra is None:
        extra = {}
    elif 'unsupported' in extra:
        raise NotImplementedError((decl, extra))

    unsupported = _check_unsupported(decl, typedeps, types, knowntypes)
    extra['unsupported'] = unsupported

    return typedeps, extra


def _check_unsupported(decl, typedeps, types, knowntypes):
    if typedeps is None:
        raise NotImplementedError(decl)

    if decl.kind in (KIND.STRUCT, KIND.UNION):
        return _check_members(decl, typedeps, types, knowntypes)
    elif decl.kind is KIND.ENUM:
        if typedeps:
            raise NotImplementedError((decl, typedeps))
        return None
    else:
        return _check_typedep(decl, typedeps, types, knowntypes)


def _check_members(decl, typedeps, types, knowntypes):
    if isinstance(typedeps, TypeDeclaration):
        raise NotImplementedError((decl, typedeps))

    #members = decl.members or ()  # A forward decl has no members.
    members = decl.members
    if not members:
        # A forward decl has no members, but that shouldn't surface here..
        raise NotImplementedError(decl)
    if len(members) != len(typedeps):
        raise NotImplementedError((decl, typedeps))

    unsupported = []
    for member, typedecl in zip(members, typedeps):
        checked = _check_typedep(member, typedecl, types, knowntypes)
        unsupported.append(checked)
    if any(None if v is FIXED_TYPE else v for v in unsupported):
        return unsupported
    elif FIXED_TYPE in unsupported:
        return FIXED_TYPE
    else:
        return None


def _check_typedep(decl, typedecl, types, knowntypes):
    if not isinstance(typedecl, TypeDeclaration):
        if hasattr(type(typedecl), '__len__'):
            if len(typedecl) == 1:
                typedecl, = typedecl
    if typedecl is None:
        # XXX Fail?
        return 'typespec (missing)'
    elif typedecl is _info.UNKNOWN:
        if _has_other_supported_type(decl):
            return None
        # XXX Is this right?
        return 'typespec (unknown)'
    elif not isinstance(typedecl, TypeDeclaration):
        raise NotImplementedError((decl, typedecl))

    if isinstance(decl, Member):
        return _check_vartype(decl, typedecl, types, knowntypes)
    elif not isinstance(decl, Declaration):
        raise NotImplementedError(decl)
    elif decl.kind is KIND.TYPEDEF:
        return _check_vartype(decl, typedecl, types, knowntypes)
    elif decl.kind is KIND.VARIABLE:
        if not is_process_global(decl):
            return None
        if _is_kwlist(decl):
            return None
        if _has_other_supported_type(decl):
            return None
        checked = _check_vartype(decl, typedecl, types, knowntypes)
        return 'mutable' if checked is FIXED_TYPE else checked
    else:
        raise NotImplementedError(decl)


def _is_kwlist(decl):
    # keywords for PyArg_ParseTupleAndKeywords()
    # "static char *name[]" -> "static const char * const name[]"
    # XXX These should be made const.
    for relpath, name in _KWLIST_VARIANTS:
        if decl.name == name:
            if relpath == '*':
                break
            assert os.path.isabs(decl.file.filename)
            relpath = os.path.normpath(relpath)
            if decl.file.filename.endswith(os.path.sep + relpath):
                break
    else:
        return False
    vartype = ''.join(str(decl.vartype).split())
    return vartype == 'char*[]'

def _is_local_static_mutex(decl):
    if not hasattr(decl, "vartype"):
        return False

    if not hasattr(decl, "parent") or decl.parent is None:
        # We only want to allow local variables
        return False

    vartype = decl.vartype
    return (vartype.typespec == 'PyMutex') and (decl.storage == 'static')

def _has_other_supported_type(decl):
    if hasattr(decl, 'file') and decl.file.filename.endswith('.c.h'):
        assert 'clinic' in decl.file.filename, (decl,)
        if decl.name == '_kwtuple':
            return True
    if _is_local_static_mutex(decl):
        # GH-127081: Local static mutexes are used to
        # wrap libc functions that aren't thread safe
        return True
    vartype = str(decl.vartype).split()
    if vartype[0] == 'struct':
        vartype = vartype[1:]
    vartype = ''.join(vartype)
    return vartype in _OTHER_SUPPORTED_TYPES


def _check_vartype(decl, typedecl, types, knowntypes):
    """Return failure reason."""
    checked = _check_typespec(decl, typedecl, types, knowntypes)
    if checked:
        return checked
    if is_immutable(decl.vartype):
        return None
    if is_fixed_type(decl.vartype):
        return FIXED_TYPE
    return 'mutable'


def _check_typespec(decl, typedecl, types, knowntypes):
    typespec = decl.vartype.typespec
    if typedecl is not None:
        found = types.get(typedecl)
        if found is None:
            found = knowntypes.get(typedecl)

        if found is not None:
            _, extra = found
            if extra is None:
                # XXX Under what circumstances does this happen?
                extra = {}
            unsupported = extra.get('unsupported')
            if unsupported is FIXED_TYPE:
                unsupported = None
            return 'typespec' if unsupported else None
    # Fall back to default known types.
    if is_pots(typespec):
        return None
    elif is_system_type(typespec):
        return None
    elif is_funcptr(decl.vartype):
        return None
    return 'typespec'


class Analyzed(_info.Analyzed):

    @classonly
    def is_target(cls, raw):
        if not super().is_target(raw):
            return False
        if raw.kind not in KINDS:
            return False
        return True

    #@classonly
    #def _parse_raw_result(cls, result, extra):
    #    typedecl, extra = super()._parse_raw_result(result, extra)
    #    if typedecl is None:
    #        return None, extra
    #    raise NotImplementedError

    def __init__(self, item, typedecl=None, *, unsupported=None, **extra):
        if 'unsupported' in extra:
            raise NotImplementedError((item, typedecl, unsupported, extra))
        if not unsupported:
            unsupported = None
        elif isinstance(unsupported, (str, TypeDeclaration)):
            unsupported = (unsupported,)
        elif unsupported is not FIXED_TYPE:
            unsupported = tuple(unsupported)
        self.unsupported = unsupported
        extra['unsupported'] = self.unsupported  # ...for __repr__(), etc.
        if self.unsupported is None:
            #self.supported = None
            self.supported = True
        elif self.unsupported is FIXED_TYPE:
            if item.kind is KIND.VARIABLE:
                raise NotImplementedError(item, typedecl, unsupported)
            self.supported = True
        else:
            self.supported = not self.unsupported
        super().__init__(item, typedecl, **extra)

    def render(self, fmt='line', *, itemonly=False):
        if fmt == 'raw':
            yield repr(self)
            return
        rendered = super().render(fmt, itemonly=itemonly)
        # XXX ???
        #if itemonly:
        #    yield from rendered
        supported = self.supported
        if fmt in ('line', 'brief'):
            rendered, = rendered
            parts = [
                '+' if supported else '-' if supported is False else '',
                rendered,
            ]
            yield '\t'.join(parts)
        elif fmt == 'summary':
            raise NotImplementedError(fmt)
        elif fmt == 'full':
            yield from rendered
            if supported:
                yield f'\tsupported:\t{supported}'
        else:
            raise NotImplementedError(fmt)


class Analysis(_info.Analysis):
    _item_class = Analyzed

    @classonly
    def build_item(cls, info, result=None):
        if not isinstance(info, Declaration) or info.kind not in KINDS:
            raise NotImplementedError((info, result))
        return super().build_item(info, result)


def check_globals(analysis):
    # yield (data, failure)
    ignored = read_ignored()
    for item in analysis:
        if item.kind != KIND.VARIABLE:
            continue
        if item.supported:
            continue
        if item.id in ignored:
            continue
        reason = item.unsupported
        if not reason:
            reason = '???'
        elif not isinstance(reason, str):
            if len(reason) == 1:
                reason, = reason
        reason = f'({reason})'
        yield item, f'not supported {reason:20}\t{item.storage or ""} {item.vartype}'