TreeExtensions.FastTreeTweedie 方法
定义
重要
一些信息与预发行产品相关,相应产品在发行之前可能会进行重大修改。 对于此处提供的信息,Microsoft 不作任何明示或暗示的担保。
重载
FastTreeTweedie(RegressionCatalog+RegressionTrainers, String, String, String, Int32, Int32, Int32, Double)
创建 FastTreeTweedieTrainer,它使用决策树回归模型预测目标。
public static Microsoft.ML.Trainers.FastTree.FastTreeTweedieTrainer FastTreeTweedie (this Microsoft.ML.RegressionCatalog.RegressionTrainers catalog, string labelColumnName = "Label", string featureColumnName = "Features", string exampleWeightColumnName = default, int numberOfLeaves = 20, int numberOfTrees = 100, int minimumExampleCountPerLeaf = 10, double learningRate = 0.2);
static member FastTreeTweedie : Microsoft.ML.RegressionCatalog.RegressionTrainers * string * string * string * int * int * int * double -> Microsoft.ML.Trainers.FastTree.FastTreeTweedieTrainer
<Extension()>
Public Function FastTreeTweedie (catalog As RegressionCatalog.RegressionTrainers, Optional labelColumnName As String = "Label", Optional featureColumnName As String = "Features", Optional exampleWeightColumnName As String = Nothing, Optional numberOfLeaves As Integer = 20, Optional numberOfTrees As Integer = 100, Optional minimumExampleCountPerLeaf As Integer = 10, Optional learningRate As Double = 0.2) As FastTreeTweedieTrainer
参数
- exampleWeightColumnName
- String
示例权重列的名称 (可选) 。
- numberOfLeaves
- Int32
每个决策树的最大叶数。
- numberOfTrees
- Int32
在合奏中创建的决策树总数。
- minimumExampleCountPerLeaf
- Int32
形成新树叶所需的最小数据点数。
- learningRate
- Double
学习速率。
返回
示例
using System;
using System.Collections.Generic;
using System.Linq;
using Microsoft.ML;
using Microsoft.ML.Data;
namespace Samples.Dynamic.Trainers.Regression
{
public static class FastTreeTweedieRegression
{
// This example requires installation of additional NuGet
// package for Microsoft.ML.FastTree found at
// https://www.nuget.org/packages/Microsoft.ML.FastTree/
public static void Example()
{
// Create a new context for ML.NET operations. It can be used for
// exception tracking and logging, as a catalog of available operations
// and as the source of randomness. Setting the seed to a fixed number
// in this example to make outputs deterministic.
var mlContext = new MLContext(seed: 0);
// Create a list of training data points.
var dataPoints = GenerateRandomDataPoints(1000);
// Convert the list of data points to an IDataView object, which is
// consumable by ML.NET API.
var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints);
// Define the trainer.
var pipeline = mlContext.Regression.Trainers.FastTreeTweedie(
labelColumnName: nameof(DataPoint.Label),
featureColumnName: nameof(DataPoint.Features));
// Train the model.
var model = pipeline.Fit(trainingData);
// Create testing data. Use different random seed to make it different
// from training data.
var testData = mlContext.Data.LoadFromEnumerable(
GenerateRandomDataPoints(5, seed: 123));
// Run the model on test data set.
var transformedTestData = model.Transform(testData);
// Convert IDataView object to a list.
var predictions = mlContext.Data.CreateEnumerable<Prediction>(
transformedTestData, reuseRowObject: false).ToList();
// Look at 5 predictions for the Label, side by side with the actual
// Label for comparison.
foreach (var p in predictions)
Console.WriteLine($"Label: {p.Label:F3}, Prediction: {p.Score:F3}");
// Expected output:
// Label: 0.985, Prediction: 0.945
// Label: 0.155, Prediction: 0.104
// Label: 0.515, Prediction: 0.515
// Label: 0.566, Prediction: 0.448
// Label: 0.096, Prediction: 0.082
// Evaluate the overall metrics
var metrics = mlContext.Regression.Evaluate(transformedTestData);
PrintMetrics(metrics);
// Expected output:
// Mean Absolute Error: 0.04
// Mean Squared Error: 0.00
// Root Mean Squared Error: 0.06
// RSquared: 0.96 (closer to 1 is better. The worst case is 0)
}
private static IEnumerable<DataPoint> GenerateRandomDataPoints(int count,
int seed = 0)
{
var random = new Random(seed);
for (int i = 0; i < count; i++)
{
float label = (float)random.NextDouble();
yield return new DataPoint
{
Label = label,
// Create random features that are correlated with the label.
Features = Enumerable.Repeat(label, 50).Select(
x => x + (float)random.NextDouble()).ToArray()
};
}
}
// Example with label and 50 feature values. A data set is a collection of
// such examples.
private class DataPoint
{
public float Label { get; set; }
[VectorType(50)]
public float[] Features { get; set; }
}
// Class used to capture predictions.
private class Prediction
{
// Original label.
public float Label { get; set; }
// Predicted score from the trainer.
public float Score { get; set; }
}
// Print some evaluation metrics to regression problems.
private static void PrintMetrics(RegressionMetrics metrics)
{
Console.WriteLine("Mean Absolute Error: " + metrics.MeanAbsoluteError);
Console.WriteLine("Mean Squared Error: " + metrics.MeanSquaredError);
Console.WriteLine(
"Root Mean Squared Error: " + metrics.RootMeanSquaredError);
Console.WriteLine("RSquared: " + metrics.RSquared);
}
}
}
适用于
FastTreeTweedie(RegressionCatalog+RegressionTrainers, FastTreeTweedieTrainer+Options)
使用高级选项创建 FastTreeTweedieTrainer ,该选项使用决策树回归模型预测目标。
public static Microsoft.ML.Trainers.FastTree.FastTreeTweedieTrainer FastTreeTweedie (this Microsoft.ML.RegressionCatalog.RegressionTrainers catalog, Microsoft.ML.Trainers.FastTree.FastTreeTweedieTrainer.Options options);
static member FastTreeTweedie : Microsoft.ML.RegressionCatalog.RegressionTrainers * Microsoft.ML.Trainers.FastTree.FastTreeTweedieTrainer.Options -> Microsoft.ML.Trainers.FastTree.FastTreeTweedieTrainer
<Extension()>
Public Function FastTreeTweedie (catalog As RegressionCatalog.RegressionTrainers, options As FastTreeTweedieTrainer.Options) As FastTreeTweedieTrainer
参数
- options
- FastTreeTweedieTrainer.Options
训练器选项。
返回
示例
using System;
using System.Collections.Generic;
using System.Linq;
using Microsoft.ML;
using Microsoft.ML.Data;
using Microsoft.ML.Trainers.FastTree;
namespace Samples.Dynamic.Trainers.Regression
{
public static class FastTreeTweedieWithOptionsRegression
{
// This example requires installation of additional NuGet
// package for Microsoft.ML.FastTree found at
// https://www.nuget.org/packages/Microsoft.ML.FastTree/
public static void Example()
{
// Create a new context for ML.NET operations. It can be used for
// exception tracking and logging, as a catalog of available operations
// and as the source of randomness. Setting the seed to a fixed number
// in this example to make outputs deterministic.
var mlContext = new MLContext(seed: 0);
// Create a list of training data points.
var dataPoints = GenerateRandomDataPoints(1000);
// Convert the list of data points to an IDataView object, which is
// consumable by ML.NET API.
var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints);
// Define trainer options.
var options = new FastTreeTweedieTrainer.Options
{
LabelColumnName = nameof(DataPoint.Label),
FeatureColumnName = nameof(DataPoint.Features),
// Use L2Norm for early stopping.
EarlyStoppingMetric =
Microsoft.ML.Trainers.FastTree.EarlyStoppingMetric.L2Norm,
// Create a simpler model by penalizing usage of new features.
FeatureFirstUsePenalty = 0.1,
// Reduce the number of trees to 50.
NumberOfTrees = 50
};
// Define the trainer.
var pipeline =
mlContext.Regression.Trainers.FastTreeTweedie(options);
// Train the model.
var model = pipeline.Fit(trainingData);
// Create testing data. Use different random seed to make it different
// from training data.
var testData = mlContext.Data.LoadFromEnumerable(
GenerateRandomDataPoints(5, seed: 123));
// Run the model on test data set.
var transformedTestData = model.Transform(testData);
// Convert IDataView object to a list.
var predictions = mlContext.Data.CreateEnumerable<Prediction>(
transformedTestData, reuseRowObject: false).ToList();
// Look at 5 predictions for the Label, side by side with the actual
// Label for comparison.
foreach (var p in predictions)
Console.WriteLine($"Label: {p.Label:F3}, Prediction: {p.Score:F3}");
// Expected output:
// Label: 0.985, Prediction: 0.954
// Label: 0.155, Prediction: 0.103
// Label: 0.515, Prediction: 0.450
// Label: 0.566, Prediction: 0.515
// Label: 0.096, Prediction: 0.078
// Evaluate the overall metrics
var metrics = mlContext.Regression.Evaluate(transformedTestData);
PrintMetrics(metrics);
// Expected output:
// Mean Absolute Error: 0.04
// Mean Squared Error: 0.00
// Root Mean Squared Error: 0.05
// RSquared: 0.98 (closer to 1 is better. The worst case is 0)
}
private static IEnumerable<DataPoint> GenerateRandomDataPoints(int count,
int seed = 0)
{
var random = new Random(seed);
for (int i = 0; i < count; i++)
{
float label = (float)random.NextDouble();
yield return new DataPoint
{
Label = label,
// Create random features that are correlated with the label.
Features = Enumerable.Repeat(label, 50).Select(
x => x + (float)random.NextDouble()).ToArray()
};
}
}
// Example with label and 50 feature values. A data set is a collection of
// such examples.
private class DataPoint
{
public float Label { get; set; }
[VectorType(50)]
public float[] Features { get; set; }
}
// Class used to capture predictions.
private class Prediction
{
// Original label.
public float Label { get; set; }
// Predicted score from the trainer.
public float Score { get; set; }
}
// Print some evaluation metrics to regression problems.
private static void PrintMetrics(RegressionMetrics metrics)
{
Console.WriteLine("Mean Absolute Error: " + metrics.MeanAbsoluteError);
Console.WriteLine("Mean Squared Error: " + metrics.MeanSquaredError);
Console.WriteLine(
"Root Mean Squared Error: " + metrics.RootMeanSquaredError);
Console.WriteLine("RSquared: " + metrics.RSquared);
}
}
}