ExtensionsCatalog.ReplaceMissingValues 方法
定義
重要
部分資訊涉及發行前產品,在發行之前可能會有大幅修改。 Microsoft 對此處提供的資訊,不做任何明確或隱含的瑕疵擔保。
多載
ReplaceMissingValues(TransformsCatalog, InputOutputColumnPair[], MissingValueReplacingEstimator+ReplacementMode, Boolean) |
建立 ColumnCopyingEstimator ,它會將資料從 中指定的 InputColumnName 資料行複製到新的資料行: OutputColumnName 並根據 取代其中 |
ReplaceMissingValues(TransformsCatalog, String, String, MissingValueReplacingEstimator+ReplacementMode, Boolean) |
建立 MissingValueReplacingEstimator ,它會將資料從 中指定的 |
ReplaceMissingValues(TransformsCatalog, InputOutputColumnPair[], MissingValueReplacingEstimator+ReplacementMode, Boolean)
建立 ColumnCopyingEstimator ,它會將資料從 中指定的 InputColumnName 資料行複製到新的資料行: OutputColumnName 並根據 取代其中 replacementMode
遺漏的值。
public static Microsoft.ML.Transforms.MissingValueReplacingEstimator ReplaceMissingValues (this Microsoft.ML.TransformsCatalog catalog, Microsoft.ML.InputOutputColumnPair[] columns, Microsoft.ML.Transforms.MissingValueReplacingEstimator.ReplacementMode replacementMode = Microsoft.ML.Transforms.MissingValueReplacingEstimator+ReplacementMode.DefaultValue, bool imputeBySlot = true);
static member ReplaceMissingValues : Microsoft.ML.TransformsCatalog * Microsoft.ML.InputOutputColumnPair[] * Microsoft.ML.Transforms.MissingValueReplacingEstimator.ReplacementMode * bool -> Microsoft.ML.Transforms.MissingValueReplacingEstimator
<Extension()>
Public Function ReplaceMissingValues (catalog As TransformsCatalog, columns As InputOutputColumnPair(), Optional replacementMode As MissingValueReplacingEstimator.ReplacementMode = Microsoft.ML.Transforms.MissingValueReplacingEstimator+ReplacementMode.DefaultValue, Optional imputeBySlot As Boolean = true) As MissingValueReplacingEstimator
參數
- catalog
- TransformsCatalog
轉換的目錄。
- columns
- InputOutputColumnPair[]
輸入和輸出資料行的配對。 此估算器會透過浮點數或雙精度浮點數的純量或向量運作。
- replacementMode
- MissingValueReplacingEstimator.ReplacementMode
要如 中指定的取代類型 MissingValueReplacingEstimator.ReplacementMode
- imputeBySlot
- Boolean
如果 true
為 ,則會執行取代的個別位置插補。
否則,會針對整個向量資料行插入取代值。 純量和變數向量會忽略此設定,其中插補一律適用于整個資料行。
傳回
範例
using System;
using System.Collections.Generic;
using Microsoft.ML;
using Microsoft.ML.Data;
using Microsoft.ML.Transforms;
namespace Samples.Dynamic
{
class ReplaceMissingValuesMultiColumn
{
public static void Example()
{
// Create a new ML context, for ML.NET operations. It can be used for
// exception tracking and logging, as well as the source of randomness.
var mlContext = new MLContext();
// Get a small dataset as an IEnumerable and convert it to an IDataView.
var samples = new List<DataPoint>()
{
new DataPoint(){ Features1 = new float[3] {1, 1, 0}, Features2 =
new float[2] {1, 1} },
new DataPoint(){ Features1 = new float[3] {0, float.NaN, 1},
Features2 = new float[2] {0, 1} },
new DataPoint(){ Features1 = new float[3] {-1, float.NaN, -3},
Features2 = new float[2] {-1, float.NaN} },
new DataPoint(){ Features1 = new float[3] {-1, 6, -3}, Features2 =
new float[2] {0, float.PositiveInfinity} },
};
var data = mlContext.Data.LoadFromEnumerable(samples);
// Here we use the default replacement mode, which replaces the value
// with the default value for its type.
var defaultPipeline = mlContext.Transforms.ReplaceMissingValues(new[] {
new InputOutputColumnPair("MissingReplaced1", "Features1"),
new InputOutputColumnPair("MissingReplaced2", "Features2")
},
MissingValueReplacingEstimator.ReplacementMode.DefaultValue);
// Now we can transform the data and look at the output to confirm the
// behavior of the estimator. This operation doesn't actually evaluate
// data until we read the data below.
var defaultTransformer = defaultPipeline.Fit(data);
var defaultTransformedData = defaultTransformer.Transform(data);
// We can extract the newly created column as an IEnumerable of
// SampleDataTransformed, the class we define below.
var defaultRowEnumerable = mlContext.Data.CreateEnumerable<
SampleDataTransformed>(defaultTransformedData, reuseRowObject:
false);
// And finally, we can write out the rows of the dataset, looking at the
// columns of interest.
foreach (var row in defaultRowEnumerable)
Console.WriteLine("Features1: [" + string.Join(", ", row
.Features1) + "]\t MissingReplaced1: [" + string.Join(", ", row
.MissingReplaced1) + "]\t Features2: [" + string.Join(", ", row
.Features2) + "]\t MissingReplaced2: [" + string.Join(", ", row
.MissingReplaced2) + "]");
// Expected output:
// Features1: [1, 1, 0] MissingReplaced1: [1, 1, 0] Features2: [1, 1] MissingReplaced2: [1, 1]
// Features1: [0, NaN, 1] MissingReplaced1: [0, 0, 1] Features2: [0, 1] MissingReplaced2: [0, 1]
// Features1: [-1, NaN, -3] MissingReplaced1: [-1, 0, -3] Features2: [-1, NaN] MissingReplaced2: [-1, 0]
// Features1: [-1, 6, -3] MissingReplaced1: [-1, 6, -3] Features2: [0, ∞] MissingReplaced2: [0, ∞]
// Here we use the mean replacement mode, which replaces the value with
// the mean of the non values that were not missing.
var meanPipeline = mlContext.Transforms.ReplaceMissingValues(new[] {
new InputOutputColumnPair("MissingReplaced1", "Features1"),
new InputOutputColumnPair("MissingReplaced2", "Features2")
},
MissingValueReplacingEstimator.ReplacementMode.Mean);
// Now we can transform the data and look at the output to confirm the
// behavior of the estimator.
// This operation doesn't actually evaluate data until we read the data
// below.
var meanTransformer = meanPipeline.Fit(data);
var meanTransformedData = meanTransformer.Transform(data);
// We can extract the newly created column as an IEnumerable of
// SampleDataTransformed, the class we define below.
var meanRowEnumerable = mlContext.Data.CreateEnumerable<
SampleDataTransformed>(meanTransformedData, reuseRowObject: false);
// And finally, we can write out the rows of the dataset, looking at the
// columns of interest.
foreach (var row in meanRowEnumerable)
Console.WriteLine("Features1: [" + string.Join(", ", row
.Features1) + "]\t MissingReplaced1: [" + string.Join(", ", row
.MissingReplaced1) + "]\t Features2: [" + string.Join(", ", row
.Features2) + "]\t MissingReplaced2: [" + string.Join(", ", row
.MissingReplaced2) + "]");
// Expected output:
// Features1: [1, 1, 0] MissingReplaced1: [1, 1, 0] Features2: [1, 1] MissingReplaced2: [1, 1]
// Features1: [0, NaN, 1] MissingReplaced1: [0, 3.5, 1] Features2: [0, 1] MissingReplaced2: [0, 1]
// Features1: [-1, NaN, -3] MissingReplaced1: [-1, 3.5, -3] Features2: [-1, NaN] MissingReplaced2: [-1, 1]
// Features1: [-1, 6, -3] MissingReplaced1: [-1, 6, -3] Features2: [0, ∞] MissingReplaced2: [0, ∞]
}
private class DataPoint
{
[VectorType(3)]
public float[] Features1 { get; set; }
[VectorType(2)]
public float[] Features2 { get; set; }
}
private sealed class SampleDataTransformed : DataPoint
{
[VectorType(3)]
public float[] MissingReplaced1 { get; set; }
[VectorType(2)]
public float[] MissingReplaced2 { get; set; }
}
}
}
備註
此轉換可以透過數個數據行運作。
適用於
ReplaceMissingValues(TransformsCatalog, String, String, MissingValueReplacingEstimator+ReplacementMode, Boolean)
建立 MissingValueReplacingEstimator ,它會將資料從 中指定的 inputColumnName
資料行複製到新的資料行: outputColumnName
並根據 取代其中 replacementMode
遺漏的值。
public static Microsoft.ML.Transforms.MissingValueReplacingEstimator ReplaceMissingValues (this Microsoft.ML.TransformsCatalog catalog, string outputColumnName, string inputColumnName = default, Microsoft.ML.Transforms.MissingValueReplacingEstimator.ReplacementMode replacementMode = Microsoft.ML.Transforms.MissingValueReplacingEstimator+ReplacementMode.DefaultValue, bool imputeBySlot = true);
static member ReplaceMissingValues : Microsoft.ML.TransformsCatalog * string * string * Microsoft.ML.Transforms.MissingValueReplacingEstimator.ReplacementMode * bool -> Microsoft.ML.Transforms.MissingValueReplacingEstimator
<Extension()>
Public Function ReplaceMissingValues (catalog As TransformsCatalog, outputColumnName As String, Optional inputColumnName As String = Nothing, Optional replacementMode As MissingValueReplacingEstimator.ReplacementMode = Microsoft.ML.Transforms.MissingValueReplacingEstimator+ReplacementMode.DefaultValue, Optional imputeBySlot As Boolean = true) As MissingValueReplacingEstimator
參數
- catalog
- TransformsCatalog
轉換的目錄。
- outputColumnName
- String
轉換 inputColumnName
所產生的資料行名稱。
此資料行的資料類型會與輸入資料行的資料類型相同。
- replacementMode
- MissingValueReplacingEstimator.ReplacementMode
要如 中指定的取代類型 MissingValueReplacingEstimator.ReplacementMode
- imputeBySlot
- Boolean
如果為 true,則會執行個別位置的取代插補。 否則,會針對整個向量資料行插入取代值。 純量和變數向量會忽略此設定,其中插補一律適用于整個資料行。
傳回
範例
using System;
using System.Collections.Generic;
using System.Linq;
using Microsoft.ML;
using Microsoft.ML.Data;
using Microsoft.ML.Transforms;
namespace Samples.Dynamic
{
class ReplaceMissingValues
{
public static void Example()
{
// Create a new ML context, for ML.NET operations. It can be used for
// exception tracking and logging, as well as the source of randomness.
var mlContext = new MLContext();
// Get a small dataset as an IEnumerable and convert it to an IDataView.
var samples = new List<DataPoint>()
{
new DataPoint(){ Features = new float[3] {float.PositiveInfinity, 1,
0 } },
new DataPoint(){ Features = new float[3] {0, float.NaN, 1} },
new DataPoint(){ Features = new float[3] {-1, 2, -3} },
new DataPoint(){ Features = new float[3] {-1, float.NaN, -3} },
};
var data = mlContext.Data.LoadFromEnumerable(samples);
// Here we use the default replacement mode, which replaces the value
// with the default value for its type.
var defaultPipeline = mlContext.Transforms.ReplaceMissingValues(
"MissingReplaced", "Features", MissingValueReplacingEstimator
.ReplacementMode.DefaultValue);
// Now we can transform the data and look at the output to confirm the
// behavior of the estimator. This operation doesn't actually evaluate
// data until we read the data below.
var defaultTransformer = defaultPipeline.Fit(data);
var defaultTransformedData = defaultTransformer.Transform(data);
// We can extract the newly created column as an IEnumerable of
// SampleDataTransformed, the class we define below.
var defaultRowEnumerable = mlContext.Data.CreateEnumerable<
SampleDataTransformed>(defaultTransformedData, reuseRowObject:
false);
// And finally, we can write out the rows of the dataset, looking at the
// columns of interest.
foreach (var row in defaultRowEnumerable)
Console.WriteLine("Features: [" + string.Join(", ", row.Features) +
"]\t MissingReplaced: [" + string.Join(", ", row
.MissingReplaced) + "]");
// Expected output:
// Features: [∞, 1, 0] MissingReplaced: [∞, 1, 0]
// Features: [0, NaN, 1] MissingReplaced: [0, 0, 1]
// Features: [-1, 2, -3] MissingReplaced: [-1, 2, -3]
// Features: [-1, NaN, -3] MissingReplaced: [-1, 0, -3]
// Here we use the mean replacement mode, which replaces the value with
// the mean of the non values that were not missing.
var meanPipeline = mlContext.Transforms.ReplaceMissingValues(
"MissingReplaced", "Features", MissingValueReplacingEstimator
.ReplacementMode.Mean);
// Now we can transform the data and look at the output to confirm the
// behavior of the estimator. This operation doesn't actually evaluate
// data until we read the data below.
var meanTransformer = meanPipeline.Fit(data);
var meanTransformedData = meanTransformer.Transform(data);
// We can extract the newly created column as an IEnumerable of
// SampleDataTransformed, the class we define below.
var meanRowEnumerable = mlContext.Data.CreateEnumerable<
SampleDataTransformed>(meanTransformedData, reuseRowObject: false);
// And finally, we can write out the rows of the dataset, looking at the
// columns of interest.
foreach (var row in meanRowEnumerable)
Console.WriteLine("Features: [" + string.Join(", ", row.Features) +
"]\t MissingReplaced: [" + string.Join(", ", row
.MissingReplaced) + "]");
// Expected output:
// Features: [∞, 1, 0] MissingReplaced: [∞, 1, 0]
// Features: [0, NaN, 1] MissingReplaced: [0, 1.5, 1]
// Features: [-1, 2, -3] MissingReplaced: [-1, 2, -3]
// Features: [-1, NaN, -3] MissingReplaced: [-1, 1.5, -3]
}
private class DataPoint
{
[VectorType(3)]
public float[] Features { get; set; }
}
private sealed class SampleDataTransformed : DataPoint
{
[VectorType(3)]
public float[] MissingReplaced { get; set; }
}
}
}