Source code for forml.evaluation._api

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

Evaluation mode functionality.
import abc
import typing

if typing.TYPE_CHECKING:
    from forml import evaluation, flow

[docs]class Outcome(typing.NamedTuple): """*True* and *predicted* outcome pair of output ports. These two ports provide all the data required to calculate the evaluation metric. """ true: 'flow.Publishable' """True outcomes port.""" pred: 'flow.Publishable' """Predicted outcomes port."""
[docs]class Metric(abc.ABC): """Evaluation metric interface."""
[docs] @abc.abstractmethod def score(self, *outcomes: 'evaluation.Outcome') -> 'flow.Node': """Compose the metric evaluation task on top of the given *outcomes* DAG ports. Return the tail node of the new DAG that's expected to have a single output apply-port delivering the calculated metric. Args: outcomes: Individual outcomes partitions to be scored. Note: The input is (potentially) a *sequence* of outcome partitions as the metrics might need to be calculated from separate chunks (e.g. individual cross-validation folds). Returns: Single node with single output apply-port providing the metric output. """
def __repr__(self): return f'{self.__class__.__name__}Metric'
[docs]class Method(abc.ABC): """Interface for extending the pipeline DAG with the logic for producing *true* and *predicted* outcome columns from historical data. Attention: The *method* is only producing the *true*/*prediction* outcome pairs - not an evaluation result. The outcomes are expected to be passed to some :class:`evaluation.Metric <forml.evaluation.Metric>` implementation for the actual scoring. Implementations of this interface can deliver different evaluation techniques using strategies like *holdout* or *cross-validation* etc. """
[docs] @abc.abstractmethod def produce( self, pipeline: 'flow.Composable', features: 'flow.Publishable', labels: 'flow.Publishable' ) -> typing.Iterable['evaluation.Outcome']: """Compose the DAG producing the true/predicted outcomes according to the given method. Args: pipeline: Evaluation subject - the solution pipeline to be backtested. features: Source port producing the historical features. labels: Source port producing the historical outcomes matching the features. Returns: A sequence of true/predicted outcome port pairs. """
def __repr__(self): return f'{self.__class__.__name__}Method'