Partager via


Using Data

This post contains the following topics:

Working with Arrays

You can create either dense n-dimensional arrays or distributed dense n-dimensional arrays using Microsoft codename “Cloud Numerics” lab.

Creating Arrays

You can create dense arrays with Numerics.Local. For example:

 
using local = Microsoft.Numerics.Local;
var a = local.NumericDenseArrayFactory. 

CreateFromSystemArray<double>( new double [,] { {-0.5, 1.0}, 
                                                { 0.5, 1.0} } );

Creating Distributed Arrays

You can create distributed dense arrays with Numerics.Distributed. For example:

 
using dist = Microsoft.Numerics.Distributed;
var c = new dist.NumericDenseArray<double>(a); // Explicit distributed data creation

Casting Arrays

You can cast from a distributed array to a local array. For example:

 var d = c.ToLocalArray(); // Implict distributed data recast

You can also assign local data to distributed data. For example:

 
var a = local.NumericDenseArrayFactory.CreateFromSystemArray<double>( new double [,] 
                                                                    { {-0.5, 1.0}, 
                                                                      { 0.5, 1.0} } );
dist.NumericDenseArray<double> c = a; // Assignment with backend distributed data

Loading Distributed Data from a File

The “Cloud Numerics” lab provides an interface you can implement for loading data from a file.

The steps to loading distributed data from a file are:

1. Create a class that returns an object that conforms to the Numerics.Distributed.IO.IParallelReader interface or else use or modify the Distributed.IO.CSVLoader class provided in the Cloud Numerics lab distribution.

2. Use the Distributed.IO.Loader.LoadData() method to load your data into a distributed dense array.

For more details, see the blog post titled Using the IParallelReader Interface.

Creating Distributed Arrays from Azure Blobs

For more information on Windows Azure Blob storage, navigate to the following Getting Started page https://www.microsoft.com/windowsazure/learn/get-started/

Creating Serial IO from Blobs

 using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using Microsoft.WindowsAzure.StorageClient;
using msnl = Microsoft.Numerics.Local;
using msnd = Microsoft.Numerics.Distributed;

namespace ExampleWithSerialIO
{
    class Program
    {
        // Sample blobs that hold matrices of random numbers as binary data
        static string accountName = @"https://cloudnumericslab.blob.core.windows.net/";

        // 1000-by-1000 matrix
        static string blobAddress=  @”https://cloudnumericslab.blob.core.windows.net/arraycollection/mediummatrix”;

        // Method to read blob data and convert it into local NumericDenseArray of doubles
        public static msnl.NumericDenseArray<double> ReadBlob()
        {
            long i,j;

            // Get reference to blob
            var blobClient = new CloudBlobClient(accountName);
            var blob = blobClient.GetBlobReference(blobAddress);

            // Read number of rows and columns from blob metadata
            blob.FetchAttributes();
            long rows = Convert.ToInt64(blob.Metadata["dimension0"]);
            long columns = Convert.ToInt64(blob.Metadata["dimension1"]);

            // Convert blob binary data to local NumericDenseArray
            var outArray = msnl.NumericDenseArrayFactory.Create<double>(new long[] { rows, columns });
            var blobData = blob.DownloadByteArray();
            for (i = 0; i < rows; i++)
            {
                for (j = 0; j < columns; j++)
                {
                    outArray[i, j] = BitConverter.ToDouble(blobData, (int)(i * columns + j) * 8);
                }
            }
            return outArray;
        }

        static void Main()
        {
            // Initialize runtime
            Microsoft.Numerics.NumericsRuntime.Initialize();

            // Read data and implicitly cast to distributed array
            msnd.NumericDenseArray<double> data = ReadBlob();

            // Compute mean of dataset
            var mean = Microsoft.Numerics.Statistics.Descriptive.Mean(data);

            // Write result. When running on Windows Azure cluster, 
            // the output is available in job output 
            Console.WriteLine("Mean of data: {0}", mean);

            // Shut down runtime
            Microsoft.Numerics.NumericsRuntime.Shutdown();

        }
    }
}

 

Creating Distributed IO from Blobs

 using System;
using System.Linq;
using msnl = Microsoft.Numerics.Local;
using msnd = Microsoft.Numerics.Distributed;
using Microsoft.Numerics;
using Microsoft.WindowsAzure;
using Microsoft.WindowsAzure.StorageClient;

// A example method for reading an array from blob storage
// Each blob contains a piece of 2-D array

namespace AzureArrayReader
{
    [Serializable()]
    public class AzureArrayReader : msnd.IO.IParallelReader<double>
    {
        private string accountName;
        private string containerName;

        public AzureArrayReader(string accountName,string containerName)
        {
            this.accountName = accountName;
            this.containerName = containerName;
        }

        // Assign blobs to MPI ranks
        public object[] ComputeAssignment(int nranks)
        {
            Object[] blobs = new Object[nranks];

            var blobClient = new CloudBlobClient(accountName);
            var matrixContainer = blobClient.GetContainerReference(containerName);
            var blobCount = matrixContainer.ListBlobs().Count();
            int maxBlobsPerRank = (int)Math.Ceiling((double)blobCount / (double)nranks);
            int currentBlob = 0;
            for (int i = 0; i < nranks; i++)
            {
                int step = Math.Max(0, 
                                    Math.Min(maxBlobsPerRank,
                                    blobCount - currentBlob) );
                blobs[i] = new int[] { currentBlob, step };
                currentBlob = currentBlob + step;
            }
            return (object[])blobs;
        }

        // Assume pieces are concatenated along column dimension
        public int DistributedDimension
        {
            get { return 1; }
            set { }
        }

        // Read data from blobs
        public msnl.NumericDenseArray<double> ReadWorker(Object assignment)
        {
            var blobClient = new CloudBlobClient(accountName);
            var matrixContainer = blobClient.GetContainerReference(containerName);
            int[] blobs = (int[])assignment;
            long i, j, k;
            msnl.NumericDenseArray<double> outArray;
            var firstBlob = matrixContainer.GetBlockBlobReference("slab0");
            firstBlob.FetchAttributes();
            long rows = Convert.ToInt64(firstBlob.Metadata["dimension0"]);
            long[] columnsPerSlab = new long[blobs[1]];
            if (blobs[1] > 0)
            {
                // Get blob metadata, validate that each piece has equal number of rows
                for (i = 0; i < blobs[1]; i++)
                {
                    var matrixBlob = matrixContainer.GetBlockBlobReference(
                                                    "slab" + (blobs[0] + i).ToString());
                    matrixBlob.FetchAttributes();
                    if (Convert.ToInt64(matrixBlob.Metadata["dimension0"]) != rows)
                    {
                        throw new System.IO.InvalidDataException("Invalid slab shape");
                    }
                    columnsPerSlab[i] =
                    Convert.ToInt64(matrixBlob.Metadata["dimension1"]);
                }

                // Construct output array
                outArray = 
                msnl.NumericDenseArrayFactory.Create<double>(
                                             new long[] { rows, columnsPerSlab.Sum() } );

                // Read data
                long columnCounter = 0;
                for (i = 0; i < blobs[1]; i++)
                {
                    var matrixBlob = 
                    matrixContainer.GetBlobReference("slab" + (blobs[0] + i).ToString());
                    var blobData = matrixBlob.DownloadByteArray();
                    for (j = 0; j < columnsPerSlab[i]; j++)
                    {
                        for (k = 0; k < rows; k++)
                        {
                            outArray[k, columnCounter] = 
                            BitConverter.ToDouble(blobData, (int)(j * rows + k) * 8);
                        }
                        columnCounter = columnCounter + 1;
                    }
                }
            }
            else
            {
                // If a rank was assigned zero blobs, return empty array
                outArray = 
                msnl.NumericDenseArrayFactory.Create<double>( new long[] {rows, 0 });
            }
            return outArray;
        }
    }
}

 

 

Accessing Data with LINQ

This section provides the following examples of how to use the C# LINQ extensions to access array data.

  • Extracting Selected Data by Index
  • Filtering out NaN Values

Extracting Selected Data by Index

 using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using Microsoft.Numerics;
using Microsoft.Numerics.Local;

namespace HowToRecipes
{
    class LINQtoNDAExtractExample
    {
        public static void Run()
        {
            // Create Numeric Dense Array
            var numbers = NumericDenseArrayFactory.CreateFromSystemArray<int>(
                          new int[] { 1, 2, 3, 4, 5, 6 });
            // Set indexes of start and end of the part to be extracted
            int idxStart = 1;
            int idxEnd = 4;

            Console.WriteLine("All numbers: {0}", numbers);
            Console.WriteLine("Start index: {0}, End index {1}", idxStart, idxEnd);

            // Extract 
            NumericDenseArray<int> outArray = 
            NumericDenseArrayFactory.CreateFromSystemArray<int>(
                                      numbers
                                     .Where((x, i) => (i >= idxStart && i <= idxEnd))
                                     .ToArray());

            Console.WriteLine("Extracted array: {0}", outArray);
        }
    }
}

Filtering out NaN Values

 using System;
using System.Linq;
using System.Collections;
using System.Collections.Generic;
using Microsoft.Numerics;
using Microsoft.Numerics.Local;

namespace HowToRecipes
{
    class LINQtoNDATrimNaNsExample
    {
        public static void Run()
        {
            // Create Numeric dense array with NaNs
            var sampleNan = NumericDenseArrayFactory.CreateFromSystemArray<double>(
                new double[] { double.NaN, 1.0, 2.0, 3.0, double.NaN, 4.0, 5.0, 6.0 }
                );
            Console.WriteLine("Array with NaNs: {0}", sampleNan);

            // Trim NaN
            var cleanedNDA = NumericDenseArrayFactory.CreateFromSystemArray<double>(
                sampleNan
                .Where(x => (!double.IsNaN(x)))
                .ToArray());

            Console.WriteLine("Trimmed array: {0}", cleanedNDA);
        }
    }
}