TreeExtensions.FeaturizeByFastTreeTweedie 메서드
정의
중요
일부 정보는 릴리스되기 전에 상당 부분 수정될 수 있는 시험판 제품과 관련이 있습니다. Microsoft는 여기에 제공된 정보에 대해 어떠한 명시적이거나 묵시적인 보증도 하지 않습니다.
트리 기반 기능을 만들기 위해 학습 TreeEnsembleModelParameters 하는 데 사용하는 FastTreeTweedieTrainer CreateFastTreeTweedieFeaturizationEstimator.
public static Microsoft.ML.Trainers.FastTree.FastTreeTweedieFeaturizationEstimator FeaturizeByFastTreeTweedie (this Microsoft.ML.TransformsCatalog catalog, Microsoft.ML.Trainers.FastTree.FastTreeTweedieFeaturizationEstimator.Options options);
static member FeaturizeByFastTreeTweedie : Microsoft.ML.TransformsCatalog * Microsoft.ML.Trainers.FastTree.FastTreeTweedieFeaturizationEstimator.Options -> Microsoft.ML.Trainers.FastTree.FastTreeTweedieFeaturizationEstimator
<Extension()>
Public Function FeaturizeByFastTreeTweedie (catalog As TransformsCatalog, options As FastTreeTweedieFeaturizationEstimator.Options) As FastTreeTweedieFeaturizationEstimator
매개 변수
- catalog
- TransformsCatalog
만들 FastTreeTweedieFeaturizationEstimator컨텍스트 TransformsCatalog 입니다.
를 구성하는 FastTreeTweedieFeaturizationEstimator옵션입니다. 사용 가능한 설정을 참조 FastTreeTweedieFeaturizationEstimator.Options 하세요 TreeEnsembleFeaturizationEstimatorBase.OptionsBase .
반환
예제
using System;
using System.Collections.Generic;
using System.Linq;
using Microsoft.ML;
using Microsoft.ML.Data;
using Microsoft.ML.Trainers.FastTree;
namespace Samples.Dynamic.Transforms.TreeFeaturization
{
public static class FastTreeTweedieFeaturizationWithOptions
{
// This example requires installation of additional NuGet package
// <a href="https://www.nuget.org/packages/Microsoft.ML.FastTree/">Microsoft.ML.FastTree</a>.
public static void Example()
{
// Create a new context for ML.NET operations. It can be used for
// exception tracking and logging, as a catalog of available operations
// and as the source of randomness. Setting the seed to a fixed number
// in this example to make outputs deterministic.
var mlContext = new MLContext(seed: 0);
// Create a list of training data points.
var dataPoints = GenerateRandomDataPoints(100).ToList();
// Convert the list of data points to an IDataView object, which is
// consumable by ML.NET API.
var dataView = mlContext.Data.LoadFromEnumerable(dataPoints);
// ML.NET doesn't cache data set by default. Therefore, if one reads a
// data set from a file and accesses it many times, it can be slow due
// to expensive featurization and disk operations. When the considered
// data can fit into memory, a solution is to cache the data in memory.
// Caching is especially helpful when working with iterative algorithms
// which needs many data passes.
dataView = mlContext.Data.Cache(dataView);
// Define input and output columns of tree-based featurizer.
string labelColumnName = nameof(DataPoint.Label);
string featureColumnName = nameof(DataPoint.Features);
string treesColumnName = nameof(TransformedDataPoint.Trees);
string leavesColumnName = nameof(TransformedDataPoint.Leaves);
string pathsColumnName = nameof(TransformedDataPoint.Paths);
// Define the configuration of the trainer used to train a tree-based
// model.
var trainerOptions = new FastTreeTweedieTrainer.Options
{
// Only use 80% of features to reduce over-fitting.
FeatureFraction = 0.8,
// Create a simpler model by penalizing usage of new features.
FeatureFirstUsePenalty = 0.1,
// Reduce the number of trees to 3.
NumberOfTrees = 3,
// Number of leaves per tree.
NumberOfLeaves = 6,
LabelColumnName = labelColumnName,
FeatureColumnName = featureColumnName
};
// Define the tree-based featurizer's configuration.
var options = new FastTreeTweedieFeaturizationEstimator.Options
{
InputColumnName = featureColumnName,
TreesColumnName = treesColumnName,
LeavesColumnName = leavesColumnName,
PathsColumnName = pathsColumnName,
TrainerOptions = trainerOptions
};
// Define the featurizer.
var pipeline = mlContext.Transforms.FeaturizeByFastTreeTweedie(
options);
// Train the model.
var model = pipeline.Fit(dataView);
// Create testing data. Use different random seed to make it different
// from training data.
var transformed = model.Transform(dataView);
// Convert IDataView object to a list. Each element in the resulted list
// corresponds to a row in the IDataView.
var transformedDataPoints = mlContext.Data.CreateEnumerable<
TransformedDataPoint>(transformed, false).ToList();
// Print out the transformation of the first 3 data points.
for (int i = 0; i < 3; ++i)
{
var dataPoint = dataPoints[i];
var transformedDataPoint = transformedDataPoints[i];
Console.WriteLine("The original feature vector [" + String.Join(",",
dataPoint.Features) + "] is transformed to three different " +
"tree-based feature vectors:");
Console.WriteLine(" Trees' output values: [" + String.Join(",",
transformedDataPoint.Trees) + "].");
Console.WriteLine(" Leave IDs' 0-1 representation: [" + String
.Join(",", transformedDataPoint.Leaves) + "].");
Console.WriteLine(" Paths IDs' 0-1 representation: [" + String
.Join(",", transformedDataPoint.Paths) + "].");
}
// Expected output:
// The original feature vector [1.543569,1.494266,1.284405] is
// transformed to three different tree-based feature vectors:
// Trees' output values: [-0.05652997,-0.02312196,-0.01179363].
// Leave IDs' 0-1 representation: [0,1,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0].
// Paths IDs' 0-1 representation: [1,0,0,0,0,1,1,0,1,0,1,1,0,0,0].
// The original feature vector [0.764918,1.11206,0.648211] is
// transformed to three different tree-based feature vectors:
// Trees' output values: [-0.1933938,-0.1042738,-0.2312837].
// Leave IDs' 0-1 representation: [0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0].
// Paths IDs' 0-1 representation: [1,1,1,0,0,1,1,0,0,0,1,0,0,0,0].
// The original feature vector [1.251254,1.269456,1.444864] is
// transformed to three different tree-based feature vectors:
// Trees' output values: [-0.05652997,-0.06082304,-0.04528879].
// Leave IDs' 0-1 representation: [0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0].
// Paths IDs' 0-1 representation: [1,0,0,0,0,1,1,0,1,0,1,1,1,0,1].
}
private static IEnumerable<DataPoint> GenerateRandomDataPoints(int count,
int seed = 0)
{
var random = new Random(seed);
for (int i = 0; i < count; i++)
{
float label = (float)random.NextDouble();
yield return new DataPoint
{
Label = label,
// Create random features that are correlated with the label.
Features = Enumerable.Repeat(label, 3).Select(x => x +
(float)random.NextDouble()).ToArray()
};
}
}
// Example with label and 50 feature values. A data set is a collection of
// such examples.
private class DataPoint
{
public float Label { get; set; }
[VectorType(3)]
public float[] Features { get; set; }
}
// Class used to capture the output of tree-base featurization.
private class TransformedDataPoint : DataPoint
{
// The i-th value is the output value of the i-th decision tree.
public float[] Trees { get; set; }
// The 0-1 encoding of leaves the input feature vector falls into.
public float[] Leaves { get; set; }
// The 0-1 encoding of paths the input feature vector reaches the
// leaves.
public float[] Paths { get; set; }
}
}
}