DataOperationsCatalog.Cache(IDataView, String[]) 메서드
정의
중요
일부 정보는 릴리스되기 전에 상당 부분 수정될 수 있는 시험판 제품과 관련이 있습니다. Microsoft는 여기에 제공된 정보에 대해 어떠한 명시적이거나 묵시적인 보증도 하지 않습니다.
의 지연 메모리 내 캐시를 input
만듭니다.
public Microsoft.ML.IDataView Cache(Microsoft.ML.IDataView input, params string[] columnsToPrefetch);
member this.Cache : Microsoft.ML.IDataView * string[] -> Microsoft.ML.IDataView
Public Function Cache (input As IDataView, ParamArray columnsToPrefetch As String()) As IDataView
매개 변수
- input
- IDataView
입력 데이터입니다.
- columnsToPrefetch
- String[]
모든 항목이 캐시될 때마다 캐시해야 하는 열입니다. 빈 배열 또는 null 값은 열이 첫 번째 액세스 시 캐시됨을 의미합니다.
반환
예제
using System;
using Microsoft.ML;
using Microsoft.ML.SamplesUtils;
namespace Samples.Dynamic
{
public static class Cache
{
public static void Example()
{
// Create a new context for ML.NET operations. It can be used for except
// ion tracking and logging, as a catalog of available operations and as
// the source of randomness.
var mlContext = new MLContext();
var data = DatasetUtils.LoadHousingRegressionDataset(mlContext);
// Time how long it takes to page through the records if we don't cache.
(int lines, double columnAverage, double elapsedSeconds) =
TimeToScanIDataView(mlContext, data);
Console.WriteLine($"Lines={lines}," +
$"averageOfColumn0={columnAverage:0.00} and took {elapsedSeconds}" +
$"seconds.");
// Expected output (time is approximate):
// Lines=506, averageOfColumn0=564.17 and took 0.314 seconds.
// Now create a cached view of the data.
var cachedData = mlContext.Data.Cache(data);
// Time how long it takes to page through the records the first time
// they're accessed after a cache is applied. This iteration will be
// longer than subsequent calls, as the dataset is being accessed and
// stored for later. Note that this operation may be relatively quick,
// as the system may have cached the file.
(lines, columnAverage, elapsedSeconds) = TimeToScanIDataView(mlContext,
cachedData);
Console.WriteLine($"Lines={lines}," +
$"averageOfColumn0={columnAverage:0.00} and took {elapsedSeconds}" +
$"seconds.");
// Expected output (time is approximate):
// Lines=506, averageOfColumn0=564.17 and took 0.056 seconds.
// Time how long it takes to page through the records now that the data
// is cached. After the first iteration that caches the IDataView,
// future iterations, like this one, are faster because they are pulling
// from data cached in memory.
(lines, columnAverage, elapsedSeconds) = TimeToScanIDataView(mlContext,
cachedData);
Console.WriteLine(
$"Lines={lines}, averageOfColumn0={columnAverage:0.00} and took " +
$"{elapsedSeconds} seconds.");
// Expected output (time is approximate):
// Lines=506, averageOfColumn0=564.17 and took 0.006 seconds.
}
private static (int lines, double columnAverage, double elapsedSeconds)
TimeToScanIDataView(MLContext mlContext, IDataView data)
{
int lines = 0;
double columnAverage = 0.0;
var enumerable = mlContext.Data
.CreateEnumerable<HousingRegression>(data, reuseRowObject: true);
var watch = System.Diagnostics.Stopwatch.StartNew();
foreach (var row in enumerable)
{
lines++;
columnAverage += row.MedianHomeValue + row.CrimesPerCapita +
row.PercentResidental + row.PercentNonRetail + row.CharlesRiver
+ row.NitricOxides + row.RoomsPerDwelling + row.PercentPre40s +
row.EmploymentDistance + row.HighwayDistance + row.TaxRate +
row.TeacherRatio;
}
watch.Stop();
columnAverage /= lines;
var elapsed = watch.Elapsed;
return (lines, columnAverage, elapsed.Seconds);
}
/// <summary>
/// A class to hold the raw housing regression rows.
/// </summary>
public sealed class HousingRegression
{
public float MedianHomeValue { get; set; }
public float CrimesPerCapita { get; set; }
public float PercentResidental { get; set; }
public float PercentNonRetail { get; set; }
public float CharlesRiver { get; set; }
public float NitricOxides { get; set; }
public float RoomsPerDwelling { get; set; }
public float PercentPre40s { get; set; }
public float EmploymentDistance { get; set; }
public float HighwayDistance { get; set; }
public float TaxRate { get; set; }
public float TeacherRatio { get; set; }
}
}
}
설명
캐싱은 열별로 발생합니다. 열은 처음 액세스할 때만 캐시됩니다. 또한 columnsToPrefetch
'항상 필요'로 간주되므로 데이터가 처음 요청될 때 이러한 열이 캐시됩니다.