Share via


SvmLightLoaderSaverCatalog.CreateSvmLightLoader Method

Definition

Creates a loader that loads SVM-light format files. SvmLightLoader.

public static Microsoft.ML.Data.SvmLightLoader CreateSvmLightLoader (this Microsoft.ML.DataOperationsCatalog catalog, long? numberOfRows = default, int inputSize = 0, bool zeroBased = false, Microsoft.ML.Data.IMultiStreamSource dataSample = default);
static member CreateSvmLightLoader : Microsoft.ML.DataOperationsCatalog * Nullable<int64> * int * bool * Microsoft.ML.Data.IMultiStreamSource -> Microsoft.ML.Data.SvmLightLoader
<Extension()>
Public Function CreateSvmLightLoader (catalog As DataOperationsCatalog, Optional numberOfRows As Nullable(Of Long) = Nothing, Optional inputSize As Integer = 0, Optional zeroBased As Boolean = false, Optional dataSample As IMultiStreamSource = Nothing) As SvmLightLoader

Parameters

numberOfRows
Nullable<Int64>

The number of rows from the sample to be used for determining the number of features.

inputSize
Int32

The number of features in the Features column. If 0 is specified, the loader will determine it by looking at the file sample given in dataSample.

zeroBased
Boolean

If the file contains zero-based indices, this parameter should be set to true. If they are one-based it should be set to false.

dataSample
IMultiStreamSource

A data sample to be used for determining the number of features in the Features column.

Returns

Examples

using System;
using System.IO;
using System.Text;
using Microsoft.ML;
using Microsoft.ML.Data;

namespace Samples.Dynamic.DataOperations
{
    public static class LoadingSvmLight
    {
        // This examples shows how to load data with SvmLightLoader.
        public static void Example()
        {
            // Create a random SVM light format file.
            var random = new Random(42);
            var dataDirectoryName = "DataDir";
            Directory.CreateDirectory(dataDirectoryName);
            var fileName = Path.Combine(dataDirectoryName, $"SVM_Data.csv");
            using (var fs = File.CreateText(fileName))
            {
                // Write random lines in SVM light format
                for (int line = 0; line < 10; line++)
                {
                    var sb = new StringBuilder();
                    if (random.NextDouble() > 0.5)
                        sb.Append("1 ");
                    else
                        sb.Append("-1 ");
                    if (line % 2 == 0)
                        sb.Append("cost:1 ");
                    else
                        sb.Append("cost:2 ");
                    for (int i = 1; i <= 10; i++)
                    {
                        if (random.NextDouble() > 0.5)
                            continue;
                        sb.Append($"{i}:{random.NextDouble()} ");
                    }
                    fs.WriteLine(sb.ToString());
                }
            }

            // Create an SvmLightLoader.
            var mlContext = new MLContext();
            var file = new MultiFileSource(fileName);
            var loader = mlContext.Data.CreateSvmLightLoader(dataSample: file);

            // Load a single file from path.
            var svmData = loader.Load(file);

            PrintSchema(svmData);

            // Expected Output:
            // Column Label type Single
            // Column Weight type Single
            // Column GroupId type Key<UInt64, 0 - 18446744073709551613>
            // Column Comment type String
            // Column Features type Vector<Single, 10>

            PrintData(svmData);

            // Expected Output:
            // 1 1 0 0 0.2625927 0 0 0.7612506 0.2573214 0 0.3809696 0.5174511
            // -1 1 0 0 0 0.7051522 0 0 0.7111546 0.9062127 0 0
            // -1 1 0 0 0 0.535722 0 0 0.1491191 0.05100901 0 0
            // -1 1 0 0.6481459 0.04449836 0 0 0.4203662 0 0 0.01325378 0.2674384
            // -1 1 0 0 0.7978093 0.5134962 0.008952909 0 0.003074009 0.6541431 0.9135142 0
            // -1 1 0 0.3727672 0.4369507 0 0 0.2973725 0 0 0 0.8816807
            // 1 1 0 0.1031429 0.3332489 0 0.1346936 0.5916625 0 0 0 0
            // 1 1 0 0 0 0.3454075 0 0.2197472 0.03848049 0.5923384 0.09373277 0
            // -1 1 0 0.7511514 0 0.0420841 0 0 0.9262196 0 0.545344 0
            // 1 1 0 0.02958358 0.9334617 0 0 0.8833956 0.2947684 0 0 0

            // If the loader is created without a data sample we need to specify the number of features expected in the file.
            loader = mlContext.Data.CreateSvmLightLoader(inputSize: 10);
            svmData = loader.Load(file);

            PrintSchema(svmData);
            PrintData(svmData);
        }

        private static void PrintSchema(IDataView svmData)
        {
            foreach (var col in svmData.Schema)
                Console.WriteLine($"Column {col.Name} type {col.Type}");
        }

        private static void PrintData(IDataView svmData)
        {
            using (var cursor = svmData.GetRowCursor(svmData.Schema))
            {
                var labelGetter = cursor.GetGetter<float>(svmData.Schema["Label"]);
                var weightGetter = cursor.GetGetter<float>(svmData.Schema["Weight"]);
                var featuresGetter = cursor.GetGetter<VBuffer<float>>(svmData.Schema["Features"]);

                VBuffer<float> features = default;
                while (cursor.MoveNext())
                {
                    float label = default;
                    labelGetter(ref label);

                    float weight = default;
                    weightGetter(ref weight);

                    featuresGetter(ref features);

                    Console.WriteLine($"{label} {weight} {string.Join(' ', features.DenseValues())}");
                }
            }
        }
    }
}

Applies to