Source code for forml.io.dsl._struct.kind

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

"""ETL type classes.
"""
import abc
import collections
import datetime
import decimal
import inspect
import numbers
import operator
import typing

import pandas

from .. import _exception

if typing.TYPE_CHECKING:
    from forml.io import dsl


class Meta(abc.ABCMeta):
    """Meta class for all kinds."""

    @property
    def __subkinds__(cls) -> typing.Iterable[type['dsl.Any']]:
        """Return all non-abstract sub-classes of the given kind class.

        Returns:
            Iterable of all sub-kinds.
        """

        def scan(subs: typing.Iterable[type['dsl.Any']]) -> typing.Iterable[type['dsl.Any']]:
            """Scan the class subtree of the given types.

            Args:
                subs: Iterable of classes to descend from.

            Returns:
                Iterable of all subclasses.
            """
            return (s for c in subs for s in (c, *scan(c.__subclasses__())))

        return {k for k in scan(cls.__subclasses__()) if not inspect.isabstract(k)}


class Singleton(Meta):
    """Metaclass for singleton types."""

    def __new__(mcs, name: str, bases: tuple[type], namespace: dict[str, typing.Any]):
        instance = None

        def new(cls: type['dsl.Any']) -> 'dsl.Any':
            """Singleton type."""
            nonlocal instance
            if not instance:
                instance = object.__new__(cls)
            return instance

        namespace['__new__'] = new
        return super().__new__(mcs, name, bases, namespace)


Native = typing.TypeVar('Native')


[docs]class Any(metaclass=Meta): """Base class of all types.""" @property @abc.abstractmethod def __type__(self) -> type[Native]: """Native python supertype representing this kind. Returns: Native type. """ @property @abc.abstractmethod def __rank__(self) -> int: """Rank (relative size) of the given kind. Useful to for example distinguish largest sub-kind of the given kind. Returns: Cardinality value. """ @abc.abstractmethod def __new__(cls, *args, **kwargs): """Abstract constructor.""" raise NotImplementedError() def __eq__(self, other): return other.__class__ == self.__class__ def __hash__(self): return hash(self.__class__) def __repr__(self): return self.__class__.__name__ @classmethod def match(cls, kind: 'dsl.Any') -> bool: """Check the given kind is of our type. Args: kind: Kind to be verified. Returns: True if instance of our type. """ return isinstance(kind, cls) @classmethod def ensure(cls, kind: 'dsl.Any') -> 'dsl.Any': """Ensure the given kind is of our type. Args: kind: Kind to be verified. Returns: Original kind if instance of our type or raising otherwise. """ if not cls.match(kind): raise _exception.GrammarError(f'{kind} is not a {cls.__name__}') return kind @classmethod def cast(cls, value: 'dsl.Native') -> 'dsl.Native': """Cast the value to this kind. Args: Value to cast. Returns: Value as this kind. Raises: dsl.CastError: If casting that value is not possible. """ try: return cls._cast(value) except (ValueError, TypeError) as err: raise _exception.CastError(f'Unable to cast {repr(value)} as {cls.__name__}') from err @classmethod def _cast(cls, value: 'dsl.Native') -> 'dsl.Native': """Cast the value to this kind. Args: Value to cast. Returns: Value as this kind. """ return cls.__type__(value)
class Primitive(Any, metaclass=Singleton): # pylint: disable=abstract-method """Primitive data type base class.""" def __new__(cls, *args, **kwargs): """This gets actually overwritten by metaclass.""" raise AssertionError('Expected to be replaced by metaclass') @classmethod @typing.final def cast(cls, value: 'dsl.Native') -> 'dsl.Native': if isinstance(value, cls.__type__): # pylint: disable=isinstance-second-argument-not-valid-type return value return super().cast(value) class Numeric(Primitive, metaclass=abc.ABCMeta): # pylint: disable=abstract-method """Numeric data type base class.""" __type__ = numbers.Number @classmethod def _cast(cls, value: 'dsl.Native') -> 'dsl.Native': return pandas.to_numeric(value)
[docs]class Boolean(Primitive): """Boolean data type class.""" __type__ = bool __rank__ = 0
[docs]class Integer(Numeric): """Integer data type class.""" __type__ = numbers.Integral __rank__ = 1 @classmethod def _cast(cls, value: 'dsl.Native') -> 'dsl.Native': return int(value)
[docs]class Float(Numeric): """Float data type class.""" __type__ = numbers.Real __rank__ = 2 @classmethod def _cast(cls, value: 'dsl.Native') -> 'dsl.Native': return float(value)
[docs]class Decimal(Numeric): """Decimal data type class.""" __type__ = decimal.Decimal __rank__ = 1 @classmethod def _cast(cls, value: 'dsl.Native') -> 'dsl.Native': return decimal.Decimal(value)
[docs]class String(Primitive): """String data type class.""" __type__ = str __rank__ = 1
[docs]class Date(Primitive): """Date data type class.""" __type__ = datetime.date __rank__ = 2 @classmethod def _cast(cls, value: 'dsl.Native') -> 'dsl.Native': return pandas.to_datetime(value).date()
[docs]class Timestamp(Date): """Timestamp data type class.""" __type__ = datetime.datetime __rank__ = 1 @classmethod def _cast(cls, value: 'dsl.Native') -> 'dsl.Native': return pandas.to_datetime(value)
class Compound(Any, tuple, metaclass=abc.ABCMeta): """Complex data type class.""" @classmethod def _cast(cls, value: 'dsl.Native') -> 'dsl.Native': raise _exception.UnsupportedError('Compound value casting not implemented.') @property def __rank__(self) -> int: return len(self) @abc.abstractmethod def __new__(cls, *args, **kwargs): """Abstract constructor.""" raise NotImplementedError() def __eq__(self, other): return Any.__eq__(self, other) and tuple.__eq__(self, other) def __hash__(self): return Any.__hash__(self) ^ tuple.__hash__(self)
[docs]class Array(Compound): """Array data type class. Args: element: Array element kind. """ element: 'dsl.Any' = property(operator.itemgetter(0)) __type__ = typing.Sequence def __new__(cls, element: 'dsl.Any'): return tuple.__new__(cls, [element])
[docs]class Map(Compound): """Map data type class. Args: key: Map keys kind. value: Map values kind. """ key: 'dsl.Any' = property(operator.itemgetter(0)) value: 'dsl.Any' = property(operator.itemgetter(1)) __type__ = typing.Mapping def __new__(cls, key: 'dsl.Any', value: 'dsl.Any'): return tuple.__new__(cls, [key, value])
[docs]class Struct(Compound): """Structure data type class. Args: element: Mapping of attribute name strings and their kinds. """ class Element(collections.namedtuple('Element', 'name, kind')): """Struct element type.""" def __eq__(self, other): return other.__class__ == self.__class__ and super().__eq__(other) def __hash__(self): return hash(self.__class__) ^ super().__hash__() __type__ = object def __new__(cls, **element: 'dsl.Any'): return tuple.__new__(cls, [cls.Element(n, k) for n, k in element.items()])
def reflect(value: typing.Any) -> 'dsl.Any': """Get the type of the provided value. Args: value: Value to be inspected for type. Returns: ETL type. """ def same(seq: typing.Iterable[typing.Any]) -> bool: """Return true if all elements of a non-empty sequence have the same type. Args: seq: Sequence of elements to check. Returns: True if all same. """ seq = iter(seq) first = type(next(seq)) return all(isinstance(i, first) for i in seq) for primitive in sorted(Primitive.__subkinds__, key=lambda k: k.__rank__): if isinstance(value, primitive.__type__): return primitive() if value: if isinstance(value, typing.Sequence): return Array(reflect(value[0])) if isinstance(value, typing.Mapping): keys = tuple(value.keys()) vals = tuple(value.values()) if same(keys): ktype = reflect(keys[0]) if same(vals): return Map(ktype, reflect(vals[0])) if ktype == String(): return Struct(**{k: reflect(v) for k, v in value.items()}) raise ValueError(f'Value {value} is of unknown ETL type')