FactorizationMachineExtensions.FieldAwareFactorizationMachine 方法
定義
重要
部分資訊涉及發行前產品,在發行之前可能會有大幅修改。 Microsoft 對此處提供的資訊,不做任何明確或隱含的瑕疵擔保。
多載
FieldAwareFactorizationMachine(BinaryClassificationCatalog+BinaryClassificationTrainers, FieldAwareFactorizationMachineTrainer+Options)
FieldAwareFactorizationMachineTrainer使用進階選項建立,其會使用透過布林值標籤資料定型的現場感知分解機器來預測目標。
public static Microsoft.ML.Trainers.FieldAwareFactorizationMachineTrainer FieldAwareFactorizationMachine (this Microsoft.ML.BinaryClassificationCatalog.BinaryClassificationTrainers catalog, Microsoft.ML.Trainers.FieldAwareFactorizationMachineTrainer.Options options);
static member FieldAwareFactorizationMachine : Microsoft.ML.BinaryClassificationCatalog.BinaryClassificationTrainers * Microsoft.ML.Trainers.FieldAwareFactorizationMachineTrainer.Options -> Microsoft.ML.Trainers.FieldAwareFactorizationMachineTrainer
<Extension()>
Public Function FieldAwareFactorizationMachine (catalog As BinaryClassificationCatalog.BinaryClassificationTrainers, options As FieldAwareFactorizationMachineTrainer.Options) As FieldAwareFactorizationMachineTrainer
參數
二元分類目錄定型器物件。
定型器選項。
傳回
範例
using System;
using System.Collections.Generic;
using System.Linq;
using Microsoft.ML;
using Microsoft.ML.Data;
using Microsoft.ML.Trainers;
namespace Samples.Dynamic.Trainers.BinaryClassification
{
public static class FieldAwareFactorizationMachineWithOptions
{
// This example first train a field-aware factorization to binary
// classification, measure the trained model's quality, and finally
// use the trained model to make prediction.
public static void Example()
{
// Create a new context for ML.NET operations. It can be used for
// exception tracking and logging, as a catalog of available operations
// and as the source of randomness. Setting the seed to a fixed number
// in this example to make outputs deterministic.
var mlContext = new MLContext(seed: 0);
// Create a list of training data points.
IEnumerable<DataPoint> data = GenerateRandomDataPoints(500);
// Convert the list of data points to an IDataView object, which is
// consumable by ML.NET API.
var trainingData = mlContext.Data.LoadFromEnumerable(data);
// Define trainer options.
var options = new FieldAwareFactorizationMachineTrainer.Options
{
FeatureColumnName = nameof(DataPoint.Field0),
ExtraFeatureColumns =
new[] { nameof(DataPoint.Field1), nameof(DataPoint.Field2) },
LabelColumnName = nameof(DataPoint.Label),
LambdaLatent = 0.01f,
LambdaLinear = 0.001f,
LatentDimension = 16,
NumberOfIterations = 50,
LearningRate = 0.5f
};
// Define the trainer.
// This trainer trains field-aware factorization (FFM)
// for binary classification.
// See https://www.csie.ntu.edu.tw/~cjlin/papers/ffm.pdf for the theory
// behind and
// https://github.com/wschin/fast-ffm/blob/master/fast-ffm.pdf for the
// training algorithm implemented in ML.NET.
var pipeline = mlContext.BinaryClassification.Trainers
.FieldAwareFactorizationMachine(options);
// Train the model.
var model = pipeline.Fit(trainingData);
// Run the model on training data set.
var transformedTrainingData = model.Transform(trainingData);
// Measure the quality of the trained model.
var metrics = mlContext.BinaryClassification
.Evaluate(transformedTrainingData);
// Show the quality metrics.
PrintMetrics(metrics);
// Expected output:
// Accuracy: 0.99
// AUC: 1.00
// F1 Score: 0.99
// Negative Precision: 1.00
// Negative Recall: 0.98
// Positive Precision: 0.98
// Positive Recall: 1.00
// Log Loss: 0.17
// Log Loss Reduction: 0.83
// Entropy: 1.00
//
// TEST POSITIVE RATIO: 0.4760 (238.0/(238.0+262.0))
// Confusion table
// ||======================
// PREDICTED || positive | negative | Recall
// TRUTH ||======================
// positive || 199 | 39 | 0.8361
// negative || 69 | 193 | 0.7366
// ||======================
// Precision || 0.7425 | 0.8319 |
// Create prediction function from the trained model.
var engine = mlContext.Model
.CreatePredictionEngine<DataPoint, Result>(model);
// Make some predictions.
foreach (var dataPoint in data.Take(5))
{
var result = engine.Predict(dataPoint);
Console.WriteLine($"Actual label: {dataPoint.Label}, "
+ $"predicted label: {result.PredictedLabel}, "
+ $"score of being positive class: {result.Score}, "
+ $"and probability of beling positive class: "
+ $"{result.Probability}.");
}
// Expected output:
// Actual label: True, predicted label: True, score of being positive class: 1.115094, and probability of being positive class: 0.7530775.
// Actual label: False, predicted label: False, score of being positive class: -3.478797, and probability of being positive class: 0.02992158.
// Actual label: True, predicted label: True, score of being positive class: 3.191896, and probability of being positive class: 0.9605282.
// Actual label: False, predicted label: False, score of being positive class: -3.400863, and probability of being positive class: 0.03226851.
// Actual label: True, predicted label: True, score of being positive class: 4.06056, and probability of being positive class: 0.9830528.
}
// Number of features per field.
const int featureLength = 5;
// This class defines objects fed to the trained model.
private class DataPoint
{
// Label.
public bool Label { get; set; }
// Features from the first field. Note that different fields can have
// different numbers of features.
[VectorType(featureLength)]
public float[] Field0 { get; set; }
// Features from the second field.
[VectorType(featureLength)]
public float[] Field1 { get; set; }
// Features from the thrid field.
[VectorType(featureLength)]
public float[] Field2 { get; set; }
}
// This class defines objects produced by trained model. The trained model
// maps a DataPoint to a Result.
public class Result
{
// Label.
public bool Label { get; set; }
// Predicted label.
public bool PredictedLabel { get; set; }
// Predicted score.
public float Score { get; set; }
// Probability of belonging to positive class.
public float Probability { get; set; }
}
// Function used to create toy data sets.
private static IEnumerable<DataPoint> GenerateRandomDataPoints(
int exampleCount, int seed = 0)
{
var rnd = new Random(seed);
var data = new List<DataPoint>();
for (int i = 0; i < exampleCount; ++i)
{
// Initialize an example with a random label and an empty feature
// vector.
var sample = new DataPoint()
{
Label = rnd.Next() % 2 == 0,
Field0 = new float[featureLength],
Field1 = new float[featureLength],
Field2 = new float[featureLength]
};
// Fill feature vectors according the assigned label.
// Notice that features from different fields have different biases
// and therefore different distributions. In practices such as game
// recommendation, one may use one field to store features from user
// profile and another field to store features from game profile.
for (int j = 0; j < featureLength; ++j)
{
var value0 = (float)rnd.NextDouble();
// Positive class gets larger feature value.
if (sample.Label)
value0 += 0.2f;
sample.Field0[j] = value0;
var value1 = (float)rnd.NextDouble();
// Positive class gets smaller feature value.
if (sample.Label)
value1 -= 0.2f;
sample.Field1[j] = value1;
var value2 = (float)rnd.NextDouble();
// Positive class gets larger feature value.
if (sample.Label)
value2 += 0.8f;
sample.Field2[j] = value2;
}
data.Add(sample);
}
return data;
}
// Function used to show evaluation metrics such as accuracy of predictions.
private static void PrintMetrics(
CalibratedBinaryClassificationMetrics metrics)
{
Console.WriteLine($"Accuracy: {metrics.Accuracy:F2}");
Console.WriteLine($"AUC: {metrics.AreaUnderRocCurve:F2}");
Console.WriteLine($"F1 Score: {metrics.F1Score:F2}");
Console.WriteLine($"Negative Precision: " +
$"{metrics.NegativePrecision:F2}");
Console.WriteLine($"Negative Recall: {metrics.NegativeRecall:F2}");
Console.WriteLine($"Positive Precision: " +
$"{metrics.PositivePrecision:F2}");
Console.WriteLine($"Positive Recall: {metrics.PositiveRecall:F2}");
Console.WriteLine($"Log Loss: {metrics.LogLoss:F2}");
Console.WriteLine($"Log Loss Reduction: {metrics.LogLossReduction:F2}");
Console.WriteLine($"Entropy: {metrics.Entropy:F2}");
}
}
}
適用於
FieldAwareFactorizationMachine(BinaryClassificationCatalog+BinaryClassificationTrainers, String, String, String)
建立 FieldAwareFactorizationMachineTrainer ,其會使用透過布林值標籤資料定型的現場感知分解機器來預測目標。
public static Microsoft.ML.Trainers.FieldAwareFactorizationMachineTrainer FieldAwareFactorizationMachine (this Microsoft.ML.BinaryClassificationCatalog.BinaryClassificationTrainers catalog, string featureColumnName = "Features", string labelColumnName = "Label", string exampleWeightColumnName = default);
static member FieldAwareFactorizationMachine : Microsoft.ML.BinaryClassificationCatalog.BinaryClassificationTrainers * string * string * string -> Microsoft.ML.Trainers.FieldAwareFactorizationMachineTrainer
<Extension()>
Public Function FieldAwareFactorizationMachine (catalog As BinaryClassificationCatalog.BinaryClassificationTrainers, Optional featureColumnName As String = "Features", Optional labelColumnName As String = "Label", Optional exampleWeightColumnName As String = Nothing) As FieldAwareFactorizationMachineTrainer
參數
二元分類目錄定型器物件。
- exampleWeightColumnName
- String
範例權數資料行的名稱 (選擇性) 。
傳回
範例
using System;
using System.Collections.Generic;
using System.Linq;
using Microsoft.ML;
using Microsoft.ML.Data;
namespace Samples.Dynamic.Trainers.BinaryClassification
{
public static class FactorizationMachine
{
public static void Example()
{
// Create a new context for ML.NET operations. It can be used for
// exception tracking and logging, as a catalog of available operations
// and as the source of randomness. Setting the seed to a fixed number
// in this example to make outputs deterministic.
var mlContext = new MLContext(seed: 0);
// Create a list of training data points.
var dataPoints = GenerateRandomDataPoints(1000);
// Convert the list of data points to an IDataView object, which is
// consumable by ML.NET API.
var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints);
// ML.NET doesn't cache data set by default. Therefore, if one reads a
// data set from a file and accesses it many times, it can be slow due
// to expensive featurization and disk operations. When the considered
// data can fit into memory, a solution is to cache the data in memory.
// Caching is especially helpful when working with iterative algorithms
// which needs many data passes.
trainingData = mlContext.Data.Cache(trainingData);
// Define the trainer.
var pipeline = mlContext.BinaryClassification.Trainers
.FieldAwareFactorizationMachine();
// Train the model.
var model = pipeline.Fit(trainingData);
// Create testing data. Use different random seed to make it different
// from training data.
var testData = mlContext.Data
.LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123));
// Run the model on test data set.
var transformedTestData = model.Transform(testData);
// Convert IDataView object to a list.
var predictions = mlContext.Data
.CreateEnumerable<Prediction>(transformedTestData,
reuseRowObject: false).ToList();
// Print 5 predictions.
foreach (var p in predictions.Take(5))
Console.WriteLine($"Label: {p.Label}, "
+ $"Prediction: {p.PredictedLabel}");
// Expected output:
// Label: True, Prediction: False
// Label: False, Prediction: False
// Label: True, Prediction: False
// Label: True, Prediction: False
// Label: False, Prediction: False
// Evaluate the overall metrics.
var metrics = mlContext.BinaryClassification
.Evaluate(transformedTestData);
PrintMetrics(metrics);
// Expected output:
// Accuracy: 0.55
// AUC: 0.54
// F1 Score: 0.23
// Negative Precision: 0.54
// Negative Recall: 0.92
// Positive Precision: 0.62
// Positive Recall: 0.14
//
// TEST POSITIVE RATIO: 0.4760 (238.0/(238.0+262.0))
// Confusion table
// ||======================
// PREDICTED || positive | negative | Recall
// TRUTH ||======================
// positive || 203 | 35 | 0.8529
// negative || 21 | 241 | 0.9198
// ||======================
// Precision || 0.9063 | 0.8732 |
}
private static IEnumerable<DataPoint> GenerateRandomDataPoints(int count,
int seed = 0)
{
var random = new Random(seed);
float randomFloat() => (float)random.NextDouble();
for (int i = 0; i < count; i++)
{
var label = randomFloat() > 0.5f;
yield return new DataPoint
{
Label = label,
// Create random features that are correlated with the label.
// For data points with false label, the feature values are
// slightly increased by adding a constant.
Features = Enumerable.Repeat(label, 50)
.Select(x => x ? randomFloat() : randomFloat() +
0.1f).ToArray()
};
}
}
// Example with label and 50 feature values. A data set is a collection of
// such examples.
private class DataPoint
{
public bool Label { get; set; }
[VectorType(50)]
public float[] Features { get; set; }
}
// Class used to capture predictions.
private class Prediction
{
// Original label.
public bool Label { get; set; }
// Predicted label from the trainer.
public bool PredictedLabel { get; set; }
}
// Pretty-print BinaryClassificationMetrics objects.
private static void PrintMetrics(BinaryClassificationMetrics metrics)
{
Console.WriteLine($"Accuracy: {metrics.Accuracy:F2}");
Console.WriteLine($"AUC: {metrics.AreaUnderRocCurve:F2}");
Console.WriteLine($"F1 Score: {metrics.F1Score:F2}");
Console.WriteLine($"Negative Precision: " +
$"{metrics.NegativePrecision:F2}");
Console.WriteLine($"Negative Recall: {metrics.NegativeRecall:F2}");
Console.WriteLine($"Positive Precision: " +
$"{metrics.PositivePrecision:F2}");
Console.WriteLine($"Positive Recall: {metrics.PositiveRecall:F2}\n");
Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable());
}
}
}
備註
請注意,因為只有一個特徵資料行,基礎模型相當於標準分解電腦。
適用於
FieldAwareFactorizationMachine(BinaryClassificationCatalog+BinaryClassificationTrainers, String[], String, String)
建立 FieldAwareFactorizationMachineTrainer ,其會使用透過布林值標籤資料定型的現場感知分解機器來預測目標。
public static Microsoft.ML.Trainers.FieldAwareFactorizationMachineTrainer FieldAwareFactorizationMachine (this Microsoft.ML.BinaryClassificationCatalog.BinaryClassificationTrainers catalog, string[] featureColumnNames, string labelColumnName = "Label", string exampleWeightColumnName = default);
static member FieldAwareFactorizationMachine : Microsoft.ML.BinaryClassificationCatalog.BinaryClassificationTrainers * string[] * string * string -> Microsoft.ML.Trainers.FieldAwareFactorizationMachineTrainer
<Extension()>
Public Function FieldAwareFactorizationMachine (catalog As BinaryClassificationCatalog.BinaryClassificationTrainers, featureColumnNames As String(), Optional labelColumnName As String = "Label", Optional exampleWeightColumnName As String = Nothing) As FieldAwareFactorizationMachineTrainer
參數
二元分類目錄定型器物件。
- exampleWeightColumnName
- String
範例權數資料行的名稱 (選擇性) 。
傳回
範例
using System;
using System.Collections.Generic;
using System.Linq;
using Microsoft.ML;
using Microsoft.ML.Data;
namespace Samples.Dynamic.Trainers.BinaryClassification
{
public static class FieldAwareFactorizationMachine
{
// This example first train a field-aware factorization to binary
// classification, measure the trained model's quality, and finally
// use the trained model to make prediction.
public static void Example()
{
// Create a new context for ML.NET operations. It can be used for
// exception tracking and logging, as a catalog of available operations
// and as the source of randomness. Setting the seed to a fixed number
// in this example to make outputs deterministic.
var mlContext = new MLContext(seed: 0);
// Create a list of training data points.
IEnumerable<DataPoint> data = GenerateRandomDataPoints(500);
// Convert the list of data points to an IDataView object, which is
// consumable by ML.NET API.
var trainingData = mlContext.Data.LoadFromEnumerable(data);
// Define the trainer.
// This trainer trains field-aware factorization (FFM)
// for binary classification.
// See https://www.csie.ntu.edu.tw/~cjlin/papers/ffm.pdf for the theory
// behind and
// https://github.com/wschin/fast-ffm/blob/master/fast-ffm.pdf for the
// training algorithm implemented in ML.NET.
var pipeline = mlContext.BinaryClassification.Trainers
.FieldAwareFactorizationMachine(
// Specify three feature columns!
new[] {nameof(DataPoint.Field0), nameof(DataPoint.Field1),
nameof(DataPoint.Field2) },
// Specify binary label's column name.
nameof(DataPoint.Label));
// Train the model.
var model = pipeline.Fit(trainingData);
// Run the model on training data set.
var transformedTrainingData = model.Transform(trainingData);
// Measure the quality of the trained model.
var metrics = mlContext.BinaryClassification
.Evaluate(transformedTrainingData);
// Show the quality metrics.
PrintMetrics(metrics);
// Expected output:
// Accuracy: 0.99
// AUC: 1.00
// F1 Score: 0.99
// Negative Precision: 1.00
// Negative Recall: 0.98
// Positive Precision: 0.98
// Positive Recall: 1.00
// Log Loss: 0.17
// Log Loss Reduction: 0.83
// Entropy: 1.00
//
// TEST POSITIVE RATIO: 0.4760 (238.0/(238.0+262.0))
// Confusion table
// ||======================
// PREDICTED || positive | negative | Recall
// TRUTH ||======================
// positive || 193 | 45 | 0.8109
// negative || 52 | 210 | 0.8015
// ||======================
// Precision || 0.7878 | 0.8235 |
// Create prediction function from the trained model.
var engine = mlContext.Model
.CreatePredictionEngine<DataPoint, Result>(model);
// Make some predictions.
foreach (var dataPoint in data.Take(5))
{
var result = engine.Predict(dataPoint);
Console.WriteLine($"Actual label: {dataPoint.Label}, "
+ $"predicted label: {result.PredictedLabel}, "
+ $"score of being positive class: {result.Score}, "
+ $"and probability of beling positive class: "
+ $"{result.Probability}.");
}
// Expected output:
// Actual label: True, predicted label: True, score of being positive class: 1.115094, and probability of being positive class: 0.7530775.
// Actual label: False, predicted label: False, score of being positive class: -3.478797, and probability of being positive class: 0.02992158.
// Actual label: True, predicted label: True, score of being positive class: 3.191896, and probability of being positive class: 0.9605282.
// Actual label: False, predicted label: False, score of being positive class: -3.400863, and probability of being positive class: 0.03226851.
// Actual label: True, predicted label: True, score of being positive class: 4.06056, and probability of being positive class: 0.9830528.
}
// Number of features per field.
const int featureLength = 5;
// This class defines objects fed to the trained model.
private class DataPoint
{
// Label.
public bool Label { get; set; }
// Features from the first field. Note that different fields can have
// different numbers of features.
[VectorType(featureLength)]
public float[] Field0 { get; set; }
// Features from the second field.
[VectorType(featureLength)]
public float[] Field1 { get; set; }
// Features from the thrid field.
[VectorType(featureLength)]
public float[] Field2 { get; set; }
}
// This class defines objects produced by trained model. The trained model
// maps a DataPoint to a Result.
public class Result
{
// Label.
public bool Label { get; set; }
// Predicted label.
public bool PredictedLabel { get; set; }
// Predicted score.
public float Score { get; set; }
// Probability of belonging to positive class.
public float Probability { get; set; }
}
// Function used to create toy data sets.
private static IEnumerable<DataPoint> GenerateRandomDataPoints(
int exampleCount, int seed = 0)
{
var rnd = new Random(seed);
var data = new List<DataPoint>();
for (int i = 0; i < exampleCount; ++i)
{
// Initialize an example with a random label and an empty feature
// vector.
var sample = new DataPoint()
{
Label = rnd.Next() % 2 == 0,
Field0 = new float[featureLength],
Field1 = new float[featureLength],
Field2 = new float[featureLength]
};
// Fill feature vectors according the assigned label.
// Notice that features from different fields have different biases
// and therefore different distributions. In practices such as game
// recommendation, one may use one field to store features from user
// profile and another field to store features from game profile.
for (int j = 0; j < featureLength; ++j)
{
var value0 = (float)rnd.NextDouble();
// Positive class gets larger feature value.
if (sample.Label)
value0 += 0.2f;
sample.Field0[j] = value0;
var value1 = (float)rnd.NextDouble();
// Positive class gets smaller feature value.
if (sample.Label)
value1 -= 0.2f;
sample.Field1[j] = value1;
var value2 = (float)rnd.NextDouble();
// Positive class gets larger feature value.
if (sample.Label)
value2 += 0.8f;
sample.Field2[j] = value2;
}
data.Add(sample);
}
return data;
}
// Function used to show evaluation metrics such as accuracy of predictions.
private static void PrintMetrics(
CalibratedBinaryClassificationMetrics metrics)
{
Console.WriteLine($"Accuracy: {metrics.Accuracy:F2}");
Console.WriteLine($"AUC: {metrics.AreaUnderRocCurve:F2}");
Console.WriteLine($"F1 Score: {metrics.F1Score:F2}");
Console.WriteLine($"Negative Precision: " +
$"{metrics.NegativePrecision:F2}");
Console.WriteLine($"Negative Recall: {metrics.NegativeRecall:F2}");
Console.WriteLine($"Positive Precision: " +
$"{metrics.PositivePrecision:F2}");
Console.WriteLine($"Positive Recall: {metrics.PositiveRecall:F2}");
Console.WriteLine($"Log Loss: {metrics.LogLoss:F2}");
Console.WriteLine($"Log Loss Reduction: {metrics.LogLossReduction:F2}");
Console.WriteLine($"Entropy: {metrics.Entropy:F2}");
}
}
}