AutoMLExperiment 类

定义

AutoML 试验的类

public class AutoMLExperiment
type AutoMLExperiment = class
Public Class AutoMLExperiment
继承
AutoMLExperiment

示例

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using Microsoft.ML.Data;

namespace Microsoft.ML.AutoML.Samples
{
    public static class AutoMLExperiment
    {
        public static async Task RunAsync()
        {
            var seed = 0;

            // Create a new context for ML.NET operations. It can be used for
            // exception tracking and logging, as a catalog of available operations
            // and as the source of randomness. Setting the seed to a fixed number
            // in this example to make outputs deterministic.
            var context = new MLContext(seed);

            // Create a list of training data points and convert it to IDataView.
            var data = GenerateRandomBinaryClassificationDataPoints(100, seed);
            var dataView = context.Data.LoadFromEnumerable(data);

            var trainTestSplit = context.Data.TrainTestSplit(dataView);

            // Define the sweepable pipeline using predefined binary trainers and search space.
            var pipeline = context.Auto().BinaryClassification(labelColumnName: "Label", featureColumnName: "Features");

            // Create an AutoML experiment
            var experiment = context.Auto().CreateExperiment();

            // Redirect AutoML log to console
            context.Log += (object o, LoggingEventArgs e) =>
            {
                if (e.Source == nameof(AutoMLExperiment) && e.Kind > Runtime.ChannelMessageKind.Trace)
                {
                    Console.WriteLine(e.RawMessage);
                }
            };

            // Config experiment to optimize "Accuracy" metric on given dataset.
            // This experiment will run hyper-parameter optimization on given pipeline
            experiment.SetPipeline(pipeline)
                      .SetDataset(trainTestSplit.TrainSet, fold: 5) // use 5-fold cross validation to evaluate each trial
                      .SetBinaryClassificationMetric(BinaryClassificationMetric.Accuracy, "Label")
                      .SetMaxModelToExplore(100); // explore 100 trials

            // start automl experiment
            var result = await experiment.RunAsync();

            // Expected output samples during training:
            //      Update Running Trial - Id: 0
            //      Update Completed Trial - Id: 0 - Metric: 0.5536912515402218 - Pipeline: FastTreeBinary - Duration: 595 - Peak CPU: 0.00 % -Peak Memory in MB: 35.81
            //      Update Best Trial - Id: 0 - Metric: 0.5536912515402218 - Pipeline: FastTreeBinary

            // evaluate test dataset on best model.
            var bestModel = result.Model;
            var eval = bestModel.Transform(trainTestSplit.TestSet);
            var metrics = context.BinaryClassification.Evaluate(eval);

            PrintMetrics(metrics);

            // Expected output:
            //  Accuracy: 0.67
            //  AUC: 0.75
            //  F1 Score: 0.33
            //  Negative Precision: 0.88
            //  Negative Recall: 0.70
            //  Positive Precision: 0.25
            //  Positive Recall: 0.50

            //  TEST POSITIVE RATIO: 0.1667(2.0 / (2.0 + 10.0))
            //  Confusion table
            //            ||======================
            //  PREDICTED || positive | negative | Recall
            //  TRUTH     ||======================
            //   positive || 1 | 1 | 0.5000
            //   negative || 3 | 7 | 0.7000
            //            ||======================
            //  Precision || 0.2500 | 0.8750 |
        }

        private static IEnumerable<BinaryClassificationDataPoint> GenerateRandomBinaryClassificationDataPoints(int count,
            int seed = 0)

        {
            var random = new Random(seed);
            float randomFloat() => (float)random.NextDouble();
            for (int i = 0; i < count; i++)
            {
                var label = randomFloat() > 0.5f;
                yield return new BinaryClassificationDataPoint
                {
                    Label = label,
                    // Create random features that are correlated with the label.
                    // For data points with false label, the feature values are
                    // slightly increased by adding a constant.
                    Features = Enumerable.Repeat(label, 50)
                        .Select(x => x ? randomFloat() : randomFloat() +
                        0.1f).ToArray()

                };
            }
        }

        // Example with label and 50 feature values. A data set is a collection of
        // such examples.
        private class BinaryClassificationDataPoint
        {
            public bool Label { get; set; }

            [VectorType(50)]
            public float[] Features { get; set; }
        }

        // Class used to capture predictions.
        private class Prediction
        {
            // Original label.
            public bool Label { get; set; }
            // Predicted label from the trainer.
            public bool PredictedLabel { get; set; }
        }

        // Pretty-print BinaryClassificationMetrics objects.
        private static void PrintMetrics(BinaryClassificationMetrics metrics)
        {
            Console.WriteLine($"Accuracy: {metrics.Accuracy:F2}");
            Console.WriteLine($"AUC: {metrics.AreaUnderRocCurve:F2}");
            Console.WriteLine($"F1 Score: {metrics.F1Score:F2}");
            Console.WriteLine($"Negative Precision: " +
                $"{metrics.NegativePrecision:F2}");

            Console.WriteLine($"Negative Recall: {metrics.NegativeRecall:F2}");
            Console.WriteLine($"Positive Precision: " +
                $"{metrics.PositivePrecision:F2}");

            Console.WriteLine($"Positive Recall: {metrics.PositiveRecall:F2}\n");
            Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable());
        }
    }
}

构造函数

AutoMLExperiment(MLContext, AutoMLExperiment+AutoMLExperimentSettings)

AutoML 试验的类

方法

AddSearchSpace(String, SearchSpace)

AutoML 试验的类

Run()

运行试验并同步返回最佳试用结果。

RunAsync(CancellationToken)

运行试验并异步返回最佳试用结果。 如果在取消任何试验时 ct 已完成试验,试验将返回当前最佳试用结果;如果未完成试用,则引发 TimeoutException 消息“训练时间已完成,但未完成试用运行”。 另一个需要注意的是,此函数在取消后 ct 不会立即返回。 相反,它会调用 Microsoft.ML.MLContext.CancelExecution 来取消所有训练过程,并等待所有正在运行的试验被取消或完成。

SetMaximumMemoryUsageInMegaByte(Double)

AutoML 试验的类

SetMaxModelToExplore(Int32)

AutoML 试验的类

SetMonitor<TMonitor>()

AutoML 试验的类

SetMonitor<TMonitor>(Func<IServiceProvider,TMonitor>)

AutoML 试验的类

SetMonitor<TMonitor>(TMonitor)

AutoML 试验的类

SetTrainingTimeInSeconds(UInt32)

AutoML 试验的类

SetTrialRunner<TTrialRunner>()

AutoML 试验的类

SetTrialRunner<TTrialRunner>(Func<IServiceProvider,TTrialRunner>)

AutoML 试验的类

SetTrialRunner<TTrialRunner>(TTrialRunner)

AutoML 试验的类

SetTuner<TTuner>()

AutoML 试验的类

SetTuner<TTuner>(Func<IServiceProvider,TTuner>)

AutoML 试验的类

SetTuner<TTuner>(TTuner)

AutoML 试验的类

扩展方法

SetBinaryClassificationMetric(AutoMLExperiment, BinaryClassificationMetric, String, String)

将 设置为 Microsoft.ML.AutoML.BinaryMetricManagerAutoMLExperiment评估管理器。 这将用作AutoMLExperimentmetric评估指标。

SetCheckpoint(AutoMLExperiment, String)

设置 的 AutoMLExperiment检查点文件夹。 检查点文件夹将用于保存临时输出、运行历史记录和许多其他内容,这些内容将用于从上一个检查点还原训练过程并继续训练。

SetCostFrugalTuner(AutoMLExperiment)

设置为 Microsoft.ML.AutoML.CostFrugalTuner 超参数优化的调谐器。

SetDataset(AutoMLExperiment, DataOperationsCatalog+TrainTestData)

设置 的 AutoMLExperiment训练和验证数据集。 这将使用 AutoMLExperimentTrainSettrainValidationSplit 来训练模型,并使用 TestSettrainValidationSplit 来评估模型。

SetDataset(AutoMLExperiment, IDataView, IDataView, Boolean)

设置 的 AutoMLExperiment训练和验证数据集。 这将用于AutoMLExperimenttrain训练模型,并使用 validation 来评估模型。

SetDataset(AutoMLExperiment, IDataView, Int32, String)

设置 的 AutoMLExperiment交叉验证数据集。 这将使用AutoMLExperiment上的 dataset n=fold 交叉验证拆分来训练和评估模型。

SetEciCostFrugalTuner(AutoMLExperiment)

设置为 Microsoft.ML.AutoML.EciCostFrugalTuner 超参数优化的优化器。 此优化器仅适用于 中的 SweepablePipeline搜索空间。

SetGridSearchTuner(AutoMLExperiment, Int32)

设置为 Microsoft.ML.AutoML.GridSearchTuner 超参数优化的调谐器。

SetMulticlassClassificationMetric(AutoMLExperiment, MulticlassClassificationMetric, String, String)

将 设置为 Microsoft.ML.AutoML.MultiClassMetricManagerAutoMLExperiment评估管理器。 这将用作AutoMLExperimentmetric评估指标。

SetPerformanceMonitor(AutoMLExperiment, Int32)

AutoMLExperiment设置为 IPerformanceMonitorDefaultPerformanceMonitor

SetPerformanceMonitor<TPerformanceMonitor>(AutoMLExperiment, Func<IServiceProvider,TPerformanceMonitor>)

AutoMLExperiment自定义性能监视器设置为 IPerformanceMonitor

SetPerformanceMonitor<TPerformanceMonitor>(AutoMLExperiment)

AutoMLExperiment自定义性能监视器设置为 IPerformanceMonitor

SetPipeline(AutoMLExperiment, SweepablePipeline)

设置为 pipeline 训练。 这也使 AutoMLExperiment 对 automl traininng 使用 Microsoft.ML.AutoML.SweepablePipelineRunnerMicrosoft.ML.AutoML.MLContextMonitorMicrosoft.ML.AutoML.EciCostFrugalTuner

SetRandomSearchTuner(AutoMLExperiment, Nullable<Int32>)

设置为 Microsoft.ML.AutoML.RandomSearchTuner 超参数优化的调谐器。 如果 seed 提供 ,它将使用该种子初始化 Microsoft.ML.AutoML.RandomSearchTuner。 否则, Seed 将使用 。

SetRegressionMetric(AutoMLExperiment, RegressionMetric, String, String)

将 设置为 Microsoft.ML.AutoML.RegressionMetricManagerAutoMLExperiment评估管理器。 这将用作AutoMLExperimentmetric评估指标。

SetSmacTuner(AutoMLExperiment, Int32, Int32, Int32, Int32, Single, Int32, Int32, Double, Int32)

设置为 Microsoft.ML.AutoML.SmacTuner 超参数优化的调谐器。 smac 的性能在numberOfTreesnMinForSpit由 和 splitRatio决定的较大扩展中,这些扩展用于适应 smac 的内部回归量。

适用于