Dela via


BinaryClassificationCatalog.CalibratorsCatalog.Isotonic Method

Definition

Adds probability column by training pair adjacent violators calibrator.

public Microsoft.ML.Calibrators.IsotonicCalibratorEstimator Isotonic (string labelColumnName = "Label", string scoreColumnName = "Score", string exampleWeightColumnName = default);
member this.Isotonic : string * string * string -> Microsoft.ML.Calibrators.IsotonicCalibratorEstimator
Public Function Isotonic (Optional labelColumnName As String = "Label", Optional scoreColumnName As String = "Score", Optional exampleWeightColumnName As String = Nothing) As IsotonicCalibratorEstimator

Parameters

labelColumnName
String

The name of the label column.

scoreColumnName
String

The name of the score column.

exampleWeightColumnName
String

The name of the example weight column (optional).

Returns

Examples

using System;
using System.Collections.Generic;
using System.Linq;
using Microsoft.ML;

namespace Samples.Dynamic.Trainers.BinaryClassification.Calibrators
{
    public static class Isotonic
    {
        public static void Example()
        {
            // Create a new context for ML.NET operations. It can be used for
            // exception tracking and logging, as a catalog of available operations
            // and as the source of randomness. Setting the seed to a fixed number
            // in this example to make outputs deterministic.
            var mlContext = new MLContext(seed: 0);

            // Download and featurize the dataset.
            var data = Microsoft.ML.SamplesUtils.DatasetUtils
                .LoadFeaturizedAdultDataset(mlContext);

            // Leave out 10% of data for testing.
            var trainTestData = mlContext.Data
                .TrainTestSplit(data, testFraction: 0.3);

            // Create data training pipeline for non calibrated trainer and train
            // Naive calibrator on top of it.
            var pipeline = mlContext.BinaryClassification.Trainers
                .AveragedPerceptron();

            // Fit the pipeline, and get a transformer that knows how to score new
            // data.
            var transformer = pipeline.Fit(trainTestData.TrainSet);
            // Fit this pipeline to the training data.
            // Let's score the new data. The score will give us a numerical
            // estimation of the chance that the particular sample bears positive
            // sentiment. This estimate is relative to the numbers obtained.
            var scoredData = transformer.Transform(trainTestData.TestSet);
            var outScores = mlContext.Data
                .CreateEnumerable<ScoreValue>(scoredData, reuseRowObject: false);

            PrintScore(outScores, 5);
            // Preview of scoredDataPreview.RowView
            // Score  -0.09044361
            // Score  -9.105377
            // Score  -11.049
            // Score  -3.061928
            // Score  -6.375817

            // Let's train a calibrator estimator on this scored dataset. The
            // trained calibrator estimator produces a transformer that can
            // transform the scored data by adding a new column names "Probability".
            var calibratorEstimator = mlContext.BinaryClassification.Calibrators
                .Isotonic();

            var calibratorTransformer = calibratorEstimator.Fit(scoredData);

            // Transform the scored data with a calibrator transformer by adding a
            // new column names "Probability". This column is a calibrated version
            // of the "Score" column, meaning its values are a valid probability
            // value in the [0, 1] interval representing the chance that the
            // respective sample bears positive sentiment.
            var finalData = calibratorTransformer.Transform(scoredData);
            var outScoresAndProbabilities = mlContext.Data
                .CreateEnumerable<ScoreAndProbabilityValue>(finalData,
                reuseRowObject: false);

            PrintScoreAndProbability(outScoresAndProbabilities, 5);
            // Score -0.09044361  Probability 0.4473684
            // Score -9.105377    Probability 0.02122641
            // Score -11.049      Probability 0.005328597
            // Score -3.061928    Probability 0.2041801
            // Score -6.375817    Probability 0.05836574
        }

        private static void PrintScore(IEnumerable<ScoreValue> values, int numRows)
        {
            foreach (var value in values.Take(numRows))
                Console.WriteLine("{0, -10} {1, -10}", "Score", value.Score);
        }

        private static void PrintScoreAndProbability(
            IEnumerable<ScoreAndProbabilityValue> values, int numRows)

        {
            foreach (var value in values.Take(numRows))
                Console.WriteLine("{0, -10} {1, -10} {2, -10} {3, -10}", "Score",
                    value.Score, "Probability", value.Probability);

        }

        private class ScoreValue
        {
            public float Score { get; set; }
        }

        private class ScoreAndProbabilityValue
        {
            public float Score { get; set; }
            public float Probability { get; set; }
        }
    }
}

Remarks

The calibrator finds a stepwise constant function (using the Pool Adjacent Violators Algorithm aka PAV) that minimizes the squared error. Also know as Isotonic regression

Applies to