LightGbmExtensions.LightGbm 方法
定义
重要
一些信息与预发行产品相关,相应产品在发行之前可能会进行重大修改。 对于此处提供的信息,Microsoft 不作任何明示或暗示的担保。
重载
LightGbm(BinaryClassificationCatalog+BinaryClassificationTrainers, LightGbmBinaryTrainer+Options)
使用高级选项创建 LightGbmBinaryTrainer ,该选项使用梯度提升决策树二元分类来预测目标。
public static Microsoft.ML.Trainers.LightGbm.LightGbmBinaryTrainer LightGbm (this Microsoft.ML.BinaryClassificationCatalog.BinaryClassificationTrainers catalog, Microsoft.ML.Trainers.LightGbm.LightGbmBinaryTrainer.Options options);
static member LightGbm : Microsoft.ML.BinaryClassificationCatalog.BinaryClassificationTrainers * Microsoft.ML.Trainers.LightGbm.LightGbmBinaryTrainer.Options -> Microsoft.ML.Trainers.LightGbm.LightGbmBinaryTrainer
<Extension()>
Public Function LightGbm (catalog As BinaryClassificationCatalog.BinaryClassificationTrainers, options As LightGbmBinaryTrainer.Options) As LightGbmBinaryTrainer
参数
- options
- LightGbmBinaryTrainer.Options
培训师选项。
返回
示例
using System;
using System.Collections.Generic;
using System.Linq;
using Microsoft.ML;
using Microsoft.ML.Data;
using Microsoft.ML.Trainers.LightGbm;
namespace Samples.Dynamic.Trainers.BinaryClassification
{
public static class LightGbmWithOptions
{
// This example requires installation of additional NuGet package for
// Microsoft.ML.FastTree at
// https://www.nuget.org/packages/Microsoft.ML.FastTree/
public static void Example()
{
// Create a new context for ML.NET operations. It can be used for
// exception tracking and logging, as a catalog of available operations
// and as the source of randomness. Setting the seed to a fixed number
// in this example to make outputs deterministic.
var mlContext = new MLContext(seed: 0);
// Create a list of training data points.
var dataPoints = GenerateRandomDataPoints(1000);
// Convert the list of data points to an IDataView object, which is
// consumable by ML.NET API.
var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints);
// Define trainer options.
var options = new LightGbmBinaryTrainer.Options
{
Booster = new GossBooster.Options
{
TopRate = 0.3,
OtherRate = 0.2
}
};
// Define the trainer.
var pipeline = mlContext.BinaryClassification.Trainers
.LightGbm(options);
// Train the model.
var model = pipeline.Fit(trainingData);
// Create testing data. Use different random seed to make it different
// from training data.
var testData = mlContext.Data
.LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123));
// Run the model on test data set.
var transformedTestData = model.Transform(testData);
// Convert IDataView object to a list.
var predictions = mlContext.Data
.CreateEnumerable<Prediction>(transformedTestData,
reuseRowObject: false).ToList();
// Print 5 predictions.
foreach (var p in predictions.Take(5))
Console.WriteLine($"Label: {p.Label}, "
+ $"Prediction: {p.PredictedLabel}");
// Expected output:
// Label: True, Prediction: True
// Label: False, Prediction: True
// Label: True, Prediction: True
// Label: True, Prediction: True
// Label: False, Prediction: False
// Evaluate the overall metrics.
var metrics = mlContext.BinaryClassification
.Evaluate(transformedTestData);
PrintMetrics(metrics);
// Expected output:
// Accuracy: 0.71
// AUC: 0.76
// F1 Score: 0.70
// Negative Precision: 0.73
// Negative Recall: 0.71
// Positive Precision: 0.69
// Positive Recall: 0.71
//
// TEST POSITIVE RATIO: 0.4760 (238.0/(238.0+262.0))
// Confusion table
// ||======================
// PREDICTED || positive | negative | Recall
// TRUTH ||======================
// positive || 168 | 70 | 0.7059
// negative || 88 | 174 | 0.6641
// ||======================
// Precision || 0.6563 | 0.7131 |
}
private static IEnumerable<DataPoint> GenerateRandomDataPoints(int count,
int seed = 0)
{
var random = new Random(seed);
float randomFloat() => (float)random.NextDouble();
for (int i = 0; i < count; i++)
{
var label = randomFloat() > 0.5f;
yield return new DataPoint
{
Label = label,
// Create random features that are correlated with the label.
// For data points with false label, the feature values are
// slightly increased by adding a constant.
Features = Enumerable.Repeat(label, 50)
.Select(x => x ? randomFloat() : randomFloat() +
0.03f).ToArray()
};
}
}
// Example with label and 50 feature values. A data set is a collection of
// such examples.
private class DataPoint
{
public bool Label { get; set; }
[VectorType(50)]
public float[] Features { get; set; }
}
// Class used to capture predictions.
private class Prediction
{
// Original label.
public bool Label { get; set; }
// Predicted label from the trainer.
public bool PredictedLabel { get; set; }
}
// Pretty-print BinaryClassificationMetrics objects.
private static void PrintMetrics(BinaryClassificationMetrics metrics)
{
Console.WriteLine($"Accuracy: {metrics.Accuracy:F2}");
Console.WriteLine($"AUC: {metrics.AreaUnderRocCurve:F2}");
Console.WriteLine($"F1 Score: {metrics.F1Score:F2}");
Console.WriteLine($"Negative Precision: " +
$"{metrics.NegativePrecision:F2}");
Console.WriteLine($"Negative Recall: {metrics.NegativeRecall:F2}");
Console.WriteLine($"Positive Precision: " +
$"{metrics.PositivePrecision:F2}");
Console.WriteLine($"Positive Recall: {metrics.PositiveRecall:F2}\n");
Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable());
}
}
}
适用于
LightGbm(MulticlassClassificationCatalog+MulticlassClassificationTrainers, LightGbmMulticlassTrainer+Options)
使用高级选项创建 LightGbmMulticlassTrainer ,该选项使用梯度提升决策树多类分类模型预测目标。
public static Microsoft.ML.Trainers.LightGbm.LightGbmMulticlassTrainer LightGbm (this Microsoft.ML.MulticlassClassificationCatalog.MulticlassClassificationTrainers catalog, Microsoft.ML.Trainers.LightGbm.LightGbmMulticlassTrainer.Options options);
static member LightGbm : Microsoft.ML.MulticlassClassificationCatalog.MulticlassClassificationTrainers * Microsoft.ML.Trainers.LightGbm.LightGbmMulticlassTrainer.Options -> Microsoft.ML.Trainers.LightGbm.LightGbmMulticlassTrainer
<Extension()>
Public Function LightGbm (catalog As MulticlassClassificationCatalog.MulticlassClassificationTrainers, options As LightGbmMulticlassTrainer.Options) As LightGbmMulticlassTrainer
参数
培训师选项。
返回
示例
using System;
using System.Collections.Generic;
using System.Linq;
using Microsoft.ML;
using Microsoft.ML.Data;
using Microsoft.ML.Trainers.LightGbm;
namespace Samples.Dynamic.Trainers.MulticlassClassification
{
public static class LightGbmWithOptions
{
// This example requires installation of additional NuGet package for
// Microsoft.ML.FastTree at
// https://www.nuget.org/packages/Microsoft.ML.FastTree/
public static void Example()
{
// Create a new context for ML.NET operations. It can be used for
// exception tracking and logging, as a catalog of available operations
// and as the source of randomness. Setting the seed to a fixed number
// in this example to make outputs deterministic.
var mlContext = new MLContext(seed: 0);
// Create a list of training data points.
var dataPoints = GenerateRandomDataPoints(1000);
// Convert the list of data points to an IDataView object, which is
// consumable by ML.NET API.
var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints);
// Define trainer options.
var options = new LightGbmMulticlassTrainer.Options
{
Booster = new DartBooster.Options()
{
TreeDropFraction = 0.15,
XgboostDartMode = false
}
};
// Define the trainer.
var pipeline =
// Convert the string labels into key types.
mlContext.Transforms.Conversion.MapValueToKey("Label")
// Apply LightGbm multiclass trainer.
.Append(mlContext.MulticlassClassification.Trainers
.LightGbm(options));
// Train the model.
var model = pipeline.Fit(trainingData);
// Create testing data. Use different random seed to make it different
// from training data.
var testData = mlContext.Data
.LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123));
// Run the model on test data set.
var transformedTestData = model.Transform(testData);
// Convert IDataView object to a list.
var predictions = mlContext.Data
.CreateEnumerable<Prediction>(transformedTestData,
reuseRowObject: false).ToList();
// Look at 5 predictions
foreach (var p in predictions.Take(5))
Console.WriteLine($"Label: {p.Label}, " +
$"Prediction: {p.PredictedLabel}");
// Expected output:
// Label: 1, Prediction: 1
// Label: 2, Prediction: 2
// Label: 3, Prediction: 3
// Label: 2, Prediction: 2
// Label: 3, Prediction: 3
// Evaluate the overall metrics
var metrics = mlContext.MulticlassClassification
.Evaluate(transformedTestData);
PrintMetrics(metrics);
// Expected output:
// Micro Accuracy: 0.98
// Macro Accuracy: 0.98
// Log Loss: 0.07
// Log Loss Reduction: 0.94
// Confusion table
// ||========================
// PREDICTED || 0 | 1 | 2 | Recall
// TRUTH ||========================
// 0 || 156 | 0 | 4 | 0.9750
// 1 || 0 | 171 | 6 | 0.9661
// 2 || 1 | 0 | 162 | 0.9939
// ||========================
// Precision ||0.9936 |1.0000 |0.9419 |
}
// Generates random uniform doubles in [-0.5, 0.5)
// range with labels 1, 2 or 3.
private static IEnumerable<DataPoint> GenerateRandomDataPoints(int count,
int seed = 0)
{
var random = new Random(seed);
float randomFloat() => (float)(random.NextDouble() - 0.5);
for (int i = 0; i < count; i++)
{
// Generate Labels that are integers 1, 2 or 3
var label = random.Next(1, 4);
yield return new DataPoint
{
Label = (uint)label,
// Create random features that are correlated with the label.
// The feature values are slightly increased by adding a
// constant multiple of label.
Features = Enumerable.Repeat(label, 20)
.Select(x => randomFloat() + label * 0.2f).ToArray()
};
}
}
// Example with label and 20 feature values. A data set is a collection of
// such examples.
private class DataPoint
{
public uint Label { get; set; }
[VectorType(20)]
public float[] Features { get; set; }
}
// Class used to capture predictions.
private class Prediction
{
// Original label.
public uint Label { get; set; }
// Predicted label from the trainer.
public uint PredictedLabel { get; set; }
}
// Pretty-print MulticlassClassificationMetrics objects.
public static void PrintMetrics(MulticlassClassificationMetrics metrics)
{
Console.WriteLine($"Micro Accuracy: {metrics.MicroAccuracy:F2}");
Console.WriteLine($"Macro Accuracy: {metrics.MacroAccuracy:F2}");
Console.WriteLine($"Log Loss: {metrics.LogLoss:F2}");
Console.WriteLine(
$"Log Loss Reduction: {metrics.LogLossReduction:F2}\n");
Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable());
}
}
}
适用于
LightGbm(RankingCatalog+RankingTrainers, LightGbmRankingTrainer+Options)
使用高级选项创建 LightGbmRankingTrainer ,该选项使用梯度提升决策树排名模型预测目标。
public static Microsoft.ML.Trainers.LightGbm.LightGbmRankingTrainer LightGbm (this Microsoft.ML.RankingCatalog.RankingTrainers catalog, Microsoft.ML.Trainers.LightGbm.LightGbmRankingTrainer.Options options);
static member LightGbm : Microsoft.ML.RankingCatalog.RankingTrainers * Microsoft.ML.Trainers.LightGbm.LightGbmRankingTrainer.Options -> Microsoft.ML.Trainers.LightGbm.LightGbmRankingTrainer
<Extension()>
Public Function LightGbm (catalog As RankingCatalog.RankingTrainers, options As LightGbmRankingTrainer.Options) As LightGbmRankingTrainer
参数
- options
- LightGbmRankingTrainer.Options
培训师选项。
返回
示例
using System;
using System.Collections.Generic;
using System.Linq;
using Microsoft.ML;
using Microsoft.ML.Data;
using Microsoft.ML.Trainers.LightGbm;
namespace Samples.Dynamic.Trainers.Ranking
{
public static class LightGbmWithOptions
{
// This example requires installation of additional NuGet package for
// Microsoft.ML.FastTree at
// https://www.nuget.org/packages/Microsoft.ML.FastTree/
public static void Example()
{
// Create a new context for ML.NET operations. It can be used for
// exception tracking and logging, as a catalog of available operations
// and as the source of randomness. Setting the seed to a fixed number
// in this example to make outputs deterministic.
var mlContext = new MLContext(seed: 0);
// Create a list of training data points.
var dataPoints = GenerateRandomDataPoints(1000);
// Convert the list of data points to an IDataView object, which is
// consumable by ML.NET API.
var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints);
// Define trainer options.
var options = new LightGbmRankingTrainer.Options
{
NumberOfLeaves = 4,
MinimumExampleCountPerGroup = 10,
LearningRate = 0.1,
NumberOfIterations = 2,
Booster = new GradientBooster.Options
{
FeatureFraction = 0.9
},
RowGroupColumnName = "GroupId"
};
// Define the trainer.
var pipeline = mlContext.Ranking.Trainers.LightGbm(options);
// Train the model.
var model = pipeline.Fit(trainingData);
// Create testing data. Use different random seed to make it different
// from training data.
var testData = mlContext.Data.LoadFromEnumerable(
GenerateRandomDataPoints(500, seed: 123));
// Run the model on test data set.
var transformedTestData = model.Transform(testData);
// Take the top 5 rows.
var topTransformedTestData = mlContext.Data.TakeRows(
transformedTestData, 5);
// Convert IDataView object to a list.
var predictions = mlContext.Data.CreateEnumerable<Prediction>(
topTransformedTestData, reuseRowObject: false).ToList();
// Print 5 predictions.
foreach (var p in predictions)
Console.WriteLine($"Label: {p.Label}, Score: {p.Score}");
// Expected output:
// Label: 5, Score: 0.05836755
// Label: 1, Score: -0.06531862
// Label: 3, Score: -0.004557075
// Label: 3, Score: -0.009396422
// Label: 1, Score: -0.05871891
// Evaluate the overall metrics.
var metrics = mlContext.Ranking.Evaluate(transformedTestData);
PrintMetrics(metrics);
// Expected output:
// DCG: @1:28.83, @2:46.36, @3:56.18
// NDCG: @1:0.69, @2:0.72, @3:0.74
}
private static IEnumerable<DataPoint> GenerateRandomDataPoints(int count,
int seed = 0, int groupSize = 10)
{
var random = new Random(seed);
float randomFloat() => (float)random.NextDouble();
for (int i = 0; i < count; i++)
{
var label = random.Next(0, 5);
yield return new DataPoint
{
Label = (uint)label,
GroupId = (uint)(i / groupSize),
// Create random features that are correlated with the label.
// For data points with larger labels, the feature values are
// slightly increased by adding a constant.
Features = Enumerable.Repeat(label, 50).Select(
x => randomFloat() + x * 0.1f).ToArray()
};
}
}
// Example with label, groupId, and 50 feature values. A data set is a
// collection of such examples.
private class DataPoint
{
[KeyType(5)]
public uint Label { get; set; }
[KeyType(100)]
public uint GroupId { get; set; }
[VectorType(50)]
public float[] Features { get; set; }
}
// Class used to capture predictions.
private class Prediction
{
// Original label.
public uint Label { get; set; }
// Score produced from the trainer.
public float Score { get; set; }
}
// Pretty-print RankerMetrics objects.
public static void PrintMetrics(RankingMetrics metrics)
{
Console.WriteLine("DCG: " + string.Join(", ",
metrics.DiscountedCumulativeGains.Select(
(d, i) => (i + 1) + ":" + d + ":F2").ToArray()));
Console.WriteLine("NDCG: " + string.Join(", ",
metrics.NormalizedDiscountedCumulativeGains.Select(
(d, i) => (i + 1) + ":" + d + ":F2").ToArray()));
}
}
}
适用于
LightGbm(RegressionCatalog+RegressionTrainers, LightGbmRegressionTrainer+Options)
使用高级选项创建 LightGbmRegressionTrainer ,该选项使用梯度提升决策树回归模型预测目标。
public static Microsoft.ML.Trainers.LightGbm.LightGbmRegressionTrainer LightGbm (this Microsoft.ML.RegressionCatalog.RegressionTrainers catalog, Microsoft.ML.Trainers.LightGbm.LightGbmRegressionTrainer.Options options);
static member LightGbm : Microsoft.ML.RegressionCatalog.RegressionTrainers * Microsoft.ML.Trainers.LightGbm.LightGbmRegressionTrainer.Options -> Microsoft.ML.Trainers.LightGbm.LightGbmRegressionTrainer
<Extension()>
Public Function LightGbm (catalog As RegressionCatalog.RegressionTrainers, options As LightGbmRegressionTrainer.Options) As LightGbmRegressionTrainer
参数
培训师选项。
返回
示例
using System;
using System.Collections.Generic;
using System.Linq;
using Microsoft.ML;
using Microsoft.ML.Data;
using Microsoft.ML.Trainers.LightGbm;
namespace Samples.Dynamic.Trainers.Regression
{
public static class LightGbmWithOptions
{
// This example requires installation of additional NuGet
// package for Microsoft.ML.LightGBM
// at https://www.nuget.org/packages/Microsoft.ML.LightGbm/
public static void Example()
{
// Create a new context for ML.NET operations. It can be used for
// exception tracking and logging, as a catalog of available operations
// and as the source of randomness. Setting the seed to a fixed number
// in this example to make outputs deterministic.
var mlContext = new MLContext(seed: 0);
// Create a list of training data points.
var dataPoints = GenerateRandomDataPoints(1000);
// Convert the list of data points to an IDataView object, which is
// consumable by ML.NET API.
var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints);
// Define trainer options.
var options = new LightGbmRegressionTrainer.Options
{
LabelColumnName = nameof(DataPoint.Label),
FeatureColumnName = nameof(DataPoint.Features),
// How many leaves a single tree should have.
NumberOfLeaves = 4,
// Each leaf contains at least this number of training data points.
MinimumExampleCountPerLeaf = 6,
// The step size per update. Using a large value might reduce the
// training time but also increase the algorithm's numerical
// stability.
LearningRate = 0.001,
Booster = new Microsoft.ML.Trainers.LightGbm.GossBooster.Options()
{
TopRate = 0.3,
OtherRate = 0.2
}
};
// Define the trainer.
var pipeline =
mlContext.Regression.Trainers.LightGbm(options);
// Train the model.
var model = pipeline.Fit(trainingData);
// Create testing data. Use different random seed to make it different
// from training data.
var testData = mlContext.Data.LoadFromEnumerable(
GenerateRandomDataPoints(5, seed: 123));
// Run the model on test data set.
var transformedTestData = model.Transform(testData);
// Convert IDataView object to a list.
var predictions = mlContext.Data.CreateEnumerable<Prediction>(
transformedTestData, reuseRowObject: false).ToList();
// Look at 5 predictions for the Label, side by side with the actual
// Label for comparison.
foreach (var p in predictions)
Console.WriteLine($"Label: {p.Label:F3}, Prediction: {p.Score:F3}");
// Expected output:
// Label: 0.985, Prediction: 0.866
// Label: 0.155, Prediction: 0.171
// Label: 0.515, Prediction: 0.470
// Label: 0.566, Prediction: 0.476
// Label: 0.096, Prediction: 0.140
// Evaluate the overall metrics
var metrics = mlContext.Regression.Evaluate(transformedTestData);
PrintMetrics(metrics);
// Expected output:
// Mean Absolute Error: 0.04
// Mean Squared Error: 0.00
// Root Mean Squared Error: 0.06
// RSquared: 0.97 (closer to 1 is better. The worst case is 0)
}
private static IEnumerable<DataPoint> GenerateRandomDataPoints(int count,
int seed = 0)
{
var random = new Random(seed);
for (int i = 0; i < count; i++)
{
float label = (float)random.NextDouble();
yield return new DataPoint
{
Label = label,
// Create random features that are correlated with the label.
Features = Enumerable.Repeat(label, 50).Select(
x => x + (float)random.NextDouble()).ToArray()
};
}
}
// Example with label and 50 feature values. A data set is a collection of
// such examples.
private class DataPoint
{
public float Label { get; set; }
[VectorType(50)]
public float[] Features { get; set; }
}
// Class used to capture predictions.
private class Prediction
{
// Original label.
public float Label { get; set; }
// Predicted score from the trainer.
public float Score { get; set; }
}
// Print some evaluation metrics to regression problems.
private static void PrintMetrics(RegressionMetrics metrics)
{
Console.WriteLine("Mean Absolute Error: " + metrics.MeanAbsoluteError);
Console.WriteLine("Mean Squared Error: " + metrics.MeanSquaredError);
Console.WriteLine(
"Root Mean Squared Error: " + metrics.RootMeanSquaredError);
Console.WriteLine("RSquared: " + metrics.RSquared);
}
}
}
适用于
LightGbm(BinaryClassificationCatalog+BinaryClassificationTrainers, Stream, String)
从预先训练的 LightGBM 模型创建 LightGbmBinaryTrainer ,该模型使用梯度提升决策树二元分类来预测目标。
public static Microsoft.ML.Trainers.LightGbm.LightGbmBinaryTrainer LightGbm (this Microsoft.ML.BinaryClassificationCatalog.BinaryClassificationTrainers catalog, System.IO.Stream lightGbmModel, string featureColumnName = "Features");
static member LightGbm : Microsoft.ML.BinaryClassificationCatalog.BinaryClassificationTrainers * System.IO.Stream * string -> Microsoft.ML.Trainers.LightGbm.LightGbmBinaryTrainer
<Extension()>
Public Function LightGbm (catalog As BinaryClassificationCatalog.BinaryClassificationTrainers, lightGbmModel As Stream, Optional featureColumnName As String = "Features") As LightGbmBinaryTrainer
参数
返回
适用于
LightGbm(MulticlassClassificationCatalog+MulticlassClassificationTrainers, Stream, String)
从预先训练的 LightGBM 模型创建 LightGbmMulticlassTrainer ,该模型使用梯度提升决策树多类分类模型预测目标。
public static Microsoft.ML.Trainers.LightGbm.LightGbmMulticlassTrainer LightGbm (this Microsoft.ML.MulticlassClassificationCatalog.MulticlassClassificationTrainers catalog, System.IO.Stream lightGbmModel, string featureColumnName = "Features");
static member LightGbm : Microsoft.ML.MulticlassClassificationCatalog.MulticlassClassificationTrainers * System.IO.Stream * string -> Microsoft.ML.Trainers.LightGbm.LightGbmMulticlassTrainer
<Extension()>
Public Function LightGbm (catalog As MulticlassClassificationCatalog.MulticlassClassificationTrainers, lightGbmModel As Stream, Optional featureColumnName As String = "Features") As LightGbmMulticlassTrainer
参数
返回
适用于
LightGbm(RankingCatalog+RankingTrainers, Stream, String)
从预先训练的 LightGBM 模型创建 LightGbmRankingTrainer ,该模型使用梯度提升决策树排名模型预测目标。
public static Microsoft.ML.Trainers.LightGbm.LightGbmRankingTrainer LightGbm (this Microsoft.ML.RankingCatalog.RankingTrainers catalog, System.IO.Stream lightGbmModel, string featureColumnName = "Features");
static member LightGbm : Microsoft.ML.RankingCatalog.RankingTrainers * System.IO.Stream * string -> Microsoft.ML.Trainers.LightGbm.LightGbmRankingTrainer
<Extension()>
Public Function LightGbm (catalog As RankingCatalog.RankingTrainers, lightGbmModel As Stream, Optional featureColumnName As String = "Features") As LightGbmRankingTrainer
参数
返回
适用于
LightGbm(RegressionCatalog+RegressionTrainers, Stream, String)
从预先训练的 LightGBM 模型创建 LightGbmRegressionTrainer ,该模型使用梯度提升决策树回归预测目标。
public static Microsoft.ML.Trainers.LightGbm.LightGbmRegressionTrainer LightGbm (this Microsoft.ML.RegressionCatalog.RegressionTrainers catalog, System.IO.Stream lightGbmModel, string featureColumnName = "Features");
static member LightGbm : Microsoft.ML.RegressionCatalog.RegressionTrainers * System.IO.Stream * string -> Microsoft.ML.Trainers.LightGbm.LightGbmRegressionTrainer
<Extension()>
Public Function LightGbm (catalog As RegressionCatalog.RegressionTrainers, lightGbmModel As Stream, Optional featureColumnName As String = "Features") As LightGbmRegressionTrainer
参数
返回
适用于
LightGbm(BinaryClassificationCatalog+BinaryClassificationTrainers, String, String, String, Nullable<Int32>, Nullable<Int32>, Nullable<Double>, Int32)
创建 LightGbmBinaryTrainer,它使用梯度提升决策树二元分类来预测目标。
public static Microsoft.ML.Trainers.LightGbm.LightGbmBinaryTrainer LightGbm (this Microsoft.ML.BinaryClassificationCatalog.BinaryClassificationTrainers catalog, string labelColumnName = "Label", string featureColumnName = "Features", string exampleWeightColumnName = default, int? numberOfLeaves = default, int? minimumExampleCountPerLeaf = default, double? learningRate = default, int numberOfIterations = 100);
static member LightGbm : Microsoft.ML.BinaryClassificationCatalog.BinaryClassificationTrainers * string * string * string * Nullable<int> * Nullable<int> * Nullable<double> * int -> Microsoft.ML.Trainers.LightGbm.LightGbmBinaryTrainer
<Extension()>
Public Function LightGbm (catalog As BinaryClassificationCatalog.BinaryClassificationTrainers, Optional labelColumnName As String = "Label", Optional featureColumnName As String = "Features", Optional exampleWeightColumnName As String = Nothing, Optional numberOfLeaves As Nullable(Of Integer) = Nothing, Optional minimumExampleCountPerLeaf As Nullable(Of Integer) = Nothing, Optional learningRate As Nullable(Of Double) = Nothing, Optional numberOfIterations As Integer = 100) As LightGbmBinaryTrainer
参数
- exampleWeightColumnName
- String
示例权重列的名称 (可选) 。
- numberOfIterations
- Int32
提升迭代的次数。 每次迭代都会创建一个新树,因此这相当于树的数量。
返回
示例
using System;
using System.Collections.Generic;
using System.Linq;
using Microsoft.ML;
using Microsoft.ML.Data;
namespace Samples.Dynamic.Trainers.BinaryClassification
{
public static class LightGbm
{
// This example requires installation of additional NuGet package for
// Microsoft.ML.FastTree at
// https://www.nuget.org/packages/Microsoft.ML.FastTree/
public static void Example()
{
// Create a new context for ML.NET operations. It can be used for
// exception tracking and logging, as a catalog of available operations
// and as the source of randomness. Setting the seed to a fixed number
// in this example to make outputs deterministic.
var mlContext = new MLContext(seed: 0);
// Create a list of training data points.
var dataPoints = GenerateRandomDataPoints(1000);
// Convert the list of data points to an IDataView object, which is
// consumable by ML.NET API.
var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints);
// Define the trainer.
var pipeline = mlContext.BinaryClassification.Trainers
.LightGbm();
// Train the model.
var model = pipeline.Fit(trainingData);
// Create testing data. Use different random seed to make it different
// from training data.
var testData = mlContext.Data
.LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123));
// Run the model on test data set.
var transformedTestData = model.Transform(testData);
// Convert IDataView object to a list.
var predictions = mlContext.Data
.CreateEnumerable<Prediction>(transformedTestData,
reuseRowObject: false).ToList();
// Print 5 predictions.
foreach (var p in predictions.Take(5))
Console.WriteLine($"Label: {p.Label}, "
+ $"Prediction: {p.PredictedLabel}");
// Expected output:
// Label: True, Prediction: True
// Label: False, Prediction: False
// Label: True, Prediction: True
// Label: True, Prediction: True
// Label: False, Prediction: False
// Evaluate the overall metrics.
var metrics = mlContext.BinaryClassification
.Evaluate(transformedTestData);
PrintMetrics(metrics);
// Expected output:
// Accuracy: 0.77
// AUC: 0.85
// F1 Score: 0.76
// Negative Precision: 0.79
// Negative Recall: 0.77
// Positive Precision: 0.75
// Positive Recall: 0.77
//
// TEST POSITIVE RATIO: 0.4760 (238.0/(238.0+262.0))
// Confusion table
// ||======================
// PREDICTED || positive | negative | Recall
// TRUTH ||======================
// positive || 183 | 55 | 0.7689
// negative || 60 | 202 | 0.7710
// ||======================
// Precision || 0.7531 | 0.7860 |
}
private static IEnumerable<DataPoint> GenerateRandomDataPoints(int count,
int seed = 0)
{
var random = new Random(seed);
float randomFloat() => (float)random.NextDouble();
for (int i = 0; i < count; i++)
{
var label = randomFloat() > 0.5f;
yield return new DataPoint
{
Label = label,
// Create random features that are correlated with the label.
// For data points with false label, the feature values are
// slightly increased by adding a constant.
Features = Enumerable.Repeat(label, 50)
.Select(x => x ? randomFloat() : randomFloat() +
0.03f).ToArray()
};
}
}
// Example with label and 50 feature values. A data set is a collection of
// such examples.
private class DataPoint
{
public bool Label { get; set; }
[VectorType(50)]
public float[] Features { get; set; }
}
// Class used to capture predictions.
private class Prediction
{
// Original label.
public bool Label { get; set; }
// Predicted label from the trainer.
public bool PredictedLabel { get; set; }
}
// Pretty-print BinaryClassificationMetrics objects.
private static void PrintMetrics(BinaryClassificationMetrics metrics)
{
Console.WriteLine($"Accuracy: {metrics.Accuracy:F2}");
Console.WriteLine($"AUC: {metrics.AreaUnderRocCurve:F2}");
Console.WriteLine($"F1 Score: {metrics.F1Score:F2}");
Console.WriteLine($"Negative Precision: " +
$"{metrics.NegativePrecision:F2}");
Console.WriteLine($"Negative Recall: {metrics.NegativeRecall:F2}");
Console.WriteLine($"Positive Precision: " +
$"{metrics.PositivePrecision:F2}");
Console.WriteLine($"Positive Recall: {metrics.PositiveRecall:F2}\n");
Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable());
}
}
}
适用于
LightGbm(MulticlassClassificationCatalog+MulticlassClassificationTrainers, String, String, String, Nullable<Int32>, Nullable<Int32>, Nullable<Double>, Int32)
创建 LightGbmMulticlassTrainer,它使用梯度提升决策树多类分类模型预测目标。
public static Microsoft.ML.Trainers.LightGbm.LightGbmMulticlassTrainer LightGbm (this Microsoft.ML.MulticlassClassificationCatalog.MulticlassClassificationTrainers catalog, string labelColumnName = "Label", string featureColumnName = "Features", string exampleWeightColumnName = default, int? numberOfLeaves = default, int? minimumExampleCountPerLeaf = default, double? learningRate = default, int numberOfIterations = 100);
static member LightGbm : Microsoft.ML.MulticlassClassificationCatalog.MulticlassClassificationTrainers * string * string * string * Nullable<int> * Nullable<int> * Nullable<double> * int -> Microsoft.ML.Trainers.LightGbm.LightGbmMulticlassTrainer
<Extension()>
Public Function LightGbm (catalog As MulticlassClassificationCatalog.MulticlassClassificationTrainers, Optional labelColumnName As String = "Label", Optional featureColumnName As String = "Features", Optional exampleWeightColumnName As String = Nothing, Optional numberOfLeaves As Nullable(Of Integer) = Nothing, Optional minimumExampleCountPerLeaf As Nullable(Of Integer) = Nothing, Optional learningRate As Nullable(Of Double) = Nothing, Optional numberOfIterations As Integer = 100) As LightGbmMulticlassTrainer
参数
- labelColumnName
- String
标签列的名称。 列数据必须为 KeyDataViewType。
- exampleWeightColumnName
- String
示例权重列的名称 (可选) 。
- numberOfIterations
- Int32
提升迭代的次数。 每次迭代都会创建一个新树,因此这相当于树的数量。
返回
示例
using System;
using System.Collections.Generic;
using System.Linq;
using Microsoft.ML;
using Microsoft.ML.Data;
namespace Samples.Dynamic.Trainers.MulticlassClassification
{
public static class LightGbm
{
// This example requires installation of additional NuGet package for
// Microsoft.ML.FastTree at
// https://www.nuget.org/packages/Microsoft.ML.FastTree/
public static void Example()
{
// Create a new context for ML.NET operations. It can be used for
// exception tracking and logging, as a catalog of available operations
// and as the source of randomness. Setting the seed to a fixed number
// in this example to make outputs deterministic.
var mlContext = new MLContext(seed: 0);
// Create a list of training data points.
var dataPoints = GenerateRandomDataPoints(1000);
// Convert the list of data points to an IDataView object, which is
// consumable by ML.NET API.
var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints);
// Define the trainer.
var pipeline =
// Convert the string labels into key types.
mlContext.Transforms.Conversion
.MapValueToKey(nameof(DataPoint.Label))
// Apply LightGbm multiclass trainer.
.Append(mlContext.MulticlassClassification.Trainers
.LightGbm());
// Train the model.
var model = pipeline.Fit(trainingData);
// Create testing data. Use different random seed to make it different
// from training data.
var testData = mlContext.Data
.LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123));
// Run the model on test data set.
var transformedTestData = model.Transform(testData);
// Convert IDataView object to a list.
var predictions = mlContext.Data
.CreateEnumerable<Prediction>(transformedTestData,
reuseRowObject: false).ToList();
// Look at 5 predictions
foreach (var p in predictions.Take(5))
Console.WriteLine($"Label: {p.Label}, " +
$"Prediction: {p.PredictedLabel}");
// Expected output:
// Label: 1, Prediction: 1
// Label: 2, Prediction: 2
// Label: 3, Prediction: 3
// Label: 2, Prediction: 2
// Label: 3, Prediction: 3
// Evaluate the overall metrics
var metrics = mlContext.MulticlassClassification
.Evaluate(transformedTestData);
PrintMetrics(metrics);
// Expected output:
// Micro Accuracy: 0.99
// Macro Accuracy: 0.99
// Log Loss: 0.05
// Log Loss Reduction: 0.95
// Confusion table
// ||========================
// PREDICTED || 0 | 1 | 2 | Recall
// TRUTH ||========================
// 0 || 156 | 0 | 4 | 0.9750
// 1 || 0 | 176 | 1 | 0.9944
// 2 || 1 | 0 | 162 | 0.9939
// ||========================
// Precision ||0.9936 |1.0000 |0.9701 |
}
// Generates random uniform doubles in [-0.5, 0.5)
// range with labels 1, 2 or 3.
private static IEnumerable<DataPoint> GenerateRandomDataPoints(int count,
int seed = 0)
{
var random = new Random(seed);
float randomFloat() => (float)(random.NextDouble() - 0.5);
for (int i = 0; i < count; i++)
{
// Generate Labels that are integers 1, 2 or 3
var label = random.Next(1, 4);
yield return new DataPoint
{
Label = (uint)label,
// Create random features that are correlated with the label.
// The feature values are slightly increased by adding a
// constant multiple of label.
Features = Enumerable.Repeat(label, 20)
.Select(x => randomFloat() + label * 0.2f).ToArray()
};
}
}
// Example with label and 20 feature values. A data set is a collection of
// such examples.
private class DataPoint
{
public uint Label { get; set; }
[VectorType(20)]
public float[] Features { get; set; }
}
// Class used to capture predictions.
private class Prediction
{
// Original label.
public uint Label { get; set; }
// Predicted label from the trainer.
public uint PredictedLabel { get; set; }
}
// Pretty-print MulticlassClassificationMetrics objects.
public static void PrintMetrics(MulticlassClassificationMetrics metrics)
{
Console.WriteLine($"Micro Accuracy: {metrics.MicroAccuracy:F2}");
Console.WriteLine($"Macro Accuracy: {metrics.MacroAccuracy:F2}");
Console.WriteLine($"Log Loss: {metrics.LogLoss:F2}");
Console.WriteLine(
$"Log Loss Reduction: {metrics.LogLossReduction:F2}\n");
Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable());
}
}
}
适用于
LightGbm(RegressionCatalog+RegressionTrainers, String, String, String, Nullable<Int32>, Nullable<Int32>, Nullable<Double>, Int32)
创建 LightGbmRegressionTrainer,它使用梯度提升决策树回归模型预测目标。
public static Microsoft.ML.Trainers.LightGbm.LightGbmRegressionTrainer LightGbm (this Microsoft.ML.RegressionCatalog.RegressionTrainers catalog, string labelColumnName = "Label", string featureColumnName = "Features", string exampleWeightColumnName = default, int? numberOfLeaves = default, int? minimumExampleCountPerLeaf = default, double? learningRate = default, int numberOfIterations = 100);
static member LightGbm : Microsoft.ML.RegressionCatalog.RegressionTrainers * string * string * string * Nullable<int> * Nullable<int> * Nullable<double> * int -> Microsoft.ML.Trainers.LightGbm.LightGbmRegressionTrainer
<Extension()>
Public Function LightGbm (catalog As RegressionCatalog.RegressionTrainers, Optional labelColumnName As String = "Label", Optional featureColumnName As String = "Features", Optional exampleWeightColumnName As String = Nothing, Optional numberOfLeaves As Nullable(Of Integer) = Nothing, Optional minimumExampleCountPerLeaf As Nullable(Of Integer) = Nothing, Optional learningRate As Nullable(Of Double) = Nothing, Optional numberOfIterations As Integer = 100) As LightGbmRegressionTrainer
参数
- exampleWeightColumnName
- String
示例权重列的名称 (可选) 。
- numberOfIterations
- Int32
提升迭代的次数。 每次迭代都会创建一个新树,因此这相当于树的数量。
返回
示例
using System;
using System.Collections.Generic;
using System.Linq;
using Microsoft.ML;
using Microsoft.ML.Data;
namespace Samples.Dynamic.Trainers.Regression
{
public static class LightGbm
{
// This example requires installation of additional NuGet
// package for Microsoft.ML.LightGBM
// at https://www.nuget.org/packages/Microsoft.ML.LightGbm/
public static void Example()
{
// Create a new context for ML.NET operations. It can be used for
// exception tracking and logging, as a catalog of available operations
// and as the source of randomness. Setting the seed to a fixed number
// in this example to make outputs deterministic.
var mlContext = new MLContext(seed: 0);
// Create a list of training data points.
var dataPoints = GenerateRandomDataPoints(1000);
// Convert the list of data points to an IDataView object, which is
// consumable by ML.NET API.
var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints);
// Define the trainer.
var pipeline = mlContext.Regression.Trainers.
LightGbm(
labelColumnName: nameof(DataPoint.Label),
featureColumnName: nameof(DataPoint.Features));
// Train the model.
var model = pipeline.Fit(trainingData);
// Create testing data. Use different random seed to make it different
// from training data.
var testData = mlContext.Data.LoadFromEnumerable(
GenerateRandomDataPoints(5, seed: 123));
// Run the model on test data set.
var transformedTestData = model.Transform(testData);
// Convert IDataView object to a list.
var predictions = mlContext.Data.CreateEnumerable<Prediction>(
transformedTestData, reuseRowObject: false).ToList();
// Look at 5 predictions for the Label, side by side with the actual
// Label for comparison.
foreach (var p in predictions)
Console.WriteLine($"Label: {p.Label:F3}, Prediction: {p.Score:F3}");
// Expected output:
// Label: 0.985, Prediction: 0.864
// Label: 0.155, Prediction: 0.164
// Label: 0.515, Prediction: 0.470
// Label: 0.566, Prediction: 0.501
// Label: 0.096, Prediction: 0.138
// Evaluate the overall metrics
var metrics = mlContext.Regression.Evaluate(transformedTestData);
PrintMetrics(metrics);
// Expected output:
// Mean Absolute Error: 0.10
// Mean Squared Error: 0.01
// Root Mean Squared Error: 0.11
// RSquared: 0.89 (closer to 1 is better. The worst case is 0)
}
private static IEnumerable<DataPoint> GenerateRandomDataPoints(int count,
int seed = 0)
{
var random = new Random(seed);
for (int i = 0; i < count; i++)
{
float label = (float)random.NextDouble();
yield return new DataPoint
{
Label = label,
// Create random features that are correlated with the label.
Features = Enumerable.Repeat(label, 50).Select(
x => x + (float)random.NextDouble()).ToArray()
};
}
}
// Example with label and 50 feature values. A data set is a collection of
// such examples.
private class DataPoint
{
public float Label { get; set; }
[VectorType(50)]
public float[] Features { get; set; }
}
// Class used to capture predictions.
private class Prediction
{
// Original label.
public float Label { get; set; }
// Predicted score from the trainer.
public float Score { get; set; }
}
// Print some evaluation metrics to regression problems.
private static void PrintMetrics(RegressionMetrics metrics)
{
Console.WriteLine("Mean Absolute Error: " + metrics.MeanAbsoluteError);
Console.WriteLine("Mean Squared Error: " + metrics.MeanSquaredError);
Console.WriteLine(
"Root Mean Squared Error: " + metrics.RootMeanSquaredError);
Console.WriteLine("RSquared: " + metrics.RSquared);
}
}
}
适用于
LightGbm(RankingCatalog+RankingTrainers, String, String, String, String, Nullable<Int32>, Nullable<Int32>, Nullable<Double>, Int32)
创建 LightGbmRankingTrainer,它使用梯度提升决策树排名模型预测目标。
public static Microsoft.ML.Trainers.LightGbm.LightGbmRankingTrainer LightGbm (this Microsoft.ML.RankingCatalog.RankingTrainers catalog, string labelColumnName = "Label", string featureColumnName = "Features", string rowGroupColumnName = "GroupId", string exampleWeightColumnName = default, int? numberOfLeaves = default, int? minimumExampleCountPerLeaf = default, double? learningRate = default, int numberOfIterations = 100);
static member LightGbm : Microsoft.ML.RankingCatalog.RankingTrainers * string * string * string * string * Nullable<int> * Nullable<int> * Nullable<double> * int -> Microsoft.ML.Trainers.LightGbm.LightGbmRankingTrainer
<Extension()>
Public Function LightGbm (catalog As RankingCatalog.RankingTrainers, Optional labelColumnName As String = "Label", Optional featureColumnName As String = "Features", Optional rowGroupColumnName As String = "GroupId", Optional exampleWeightColumnName As String = Nothing, Optional numberOfLeaves As Nullable(Of Integer) = Nothing, Optional minimumExampleCountPerLeaf As Nullable(Of Integer) = Nothing, Optional learningRate As Nullable(Of Double) = Nothing, Optional numberOfIterations As Integer = 100) As LightGbmRankingTrainer
参数
- labelColumnName
- String
标签列的名称。 列数据必须为 Single 或 KeyDataViewType。
- rowGroupColumnName
- String
组列的名称。
- exampleWeightColumnName
- String
示例权重列的名称 (可选) 。
- numberOfIterations
- Int32
提升迭代的次数。 每次迭代都会创建一个新树,因此这相当于树的数量。
返回
示例
using System;
using System.Collections.Generic;
using System.Linq;
using Microsoft.ML;
using Microsoft.ML.Data;
namespace Samples.Dynamic.Trainers.Ranking
{
public static class LightGbm
{
// This example requires installation of additional NuGet package for
// Microsoft.ML.FastTree at
// https://www.nuget.org/packages/Microsoft.ML.FastTree/
public static void Example()
{
// Create a new context for ML.NET operations. It can be used for
// exception tracking and logging, as a catalog of available operations
// and as the source of randomness. Setting the seed to a fixed number
// in this example to make outputs deterministic.
var mlContext = new MLContext(seed: 0);
// Create a list of training data points.
var dataPoints = GenerateRandomDataPoints(1000);
// Convert the list of data points to an IDataView object, which is
// consumable by ML.NET API.
var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints);
// Define the trainer.
var pipeline = mlContext.Ranking.Trainers.LightGbm();
// Train the model.
var model = pipeline.Fit(trainingData);
// Create testing data. Use different random seed to make it different
// from training data.
var testData = mlContext.Data.LoadFromEnumerable(
GenerateRandomDataPoints(500, seed: 123));
// Run the model on test data set.
var transformedTestData = model.Transform(testData);
// Take the top 5 rows.
var topTransformedTestData = mlContext.Data.TakeRows(
transformedTestData, 5);
// Convert IDataView object to a list.
var predictions = mlContext.Data.CreateEnumerable<Prediction>(
topTransformedTestData, reuseRowObject: false).ToList();
// Print 5 predictions.
foreach (var p in predictions)
Console.WriteLine($"Label: {p.Label}, Score: {p.Score}");
// Expected output:
// Label: 5, Score: 2.493263
// Label: 1, Score: -4.528436
// Label: 3, Score: -3.002865
// Label: 3, Score: -2.151812
// Label: 1, Score: -4.089102
// Evaluate the overall metrics.
var metrics = mlContext.Ranking.Evaluate(transformedTestData);
PrintMetrics(metrics);
// Expected output:
// DCG: @1:41.95, @2:63.76, @3:75.97
// NDCG: @1:0.99, @2:0.99, @3:0.99
}
private static IEnumerable<DataPoint> GenerateRandomDataPoints(int count,
int seed = 0, int groupSize = 10)
{
var random = new Random(seed);
float randomFloat() => (float)random.NextDouble();
for (int i = 0; i < count; i++)
{
var label = random.Next(0, 5);
yield return new DataPoint
{
Label = (uint)label,
GroupId = (uint)(i / groupSize),
// Create random features that are correlated with the label.
// For data points with larger labels, the feature values are
// slightly increased by adding a constant.
Features = Enumerable.Repeat(label, 50).Select(
x => randomFloat() + x * 0.1f).ToArray()
};
}
}
// Example with label, groupId, and 50 feature values. A data set is a
// collection of such examples.
private class DataPoint
{
[KeyType(5)]
public uint Label { get; set; }
[KeyType(100)]
public uint GroupId { get; set; }
[VectorType(50)]
public float[] Features { get; set; }
}
// Class used to capture predictions.
private class Prediction
{
// Original label.
public uint Label { get; set; }
// Score produced from the trainer.
public float Score { get; set; }
}
// Pretty-print RankerMetrics objects.
public static void PrintMetrics(RankingMetrics metrics)
{
Console.WriteLine("DCG: " + string.Join(", ",
metrics.DiscountedCumulativeGains.Select(
(d, i) => (i + 1) + ":" + d + ":F2").ToArray()));
Console.WriteLine("NDCG: " + string.Join(", ",
metrics.NormalizedDiscountedCumulativeGains.Select(
(d, i) => (i + 1) + ":" + d + ":F2").ToArray()));
}
}
}