SYSK 71: Dataset Footprint in Memory vs. Remoting Size
A lot of good discussions have been held over the last several years on the remoting size of a dataset. Significant improvements have been made in ADO.NET 2.0 to reduce the remoting footprint of a dataset by using binary serialization. But is the dataset object itself taking any less memory? In other words, if I were to use the dataset instance vs. custom strongly typed object, what is the difference in the memory footprint? I want to emphasize that, in this case, I’m more interested in the object’s memory utilization, not it’s remoted size (although, I’ve measured both).
I took the AdventureWorks database, with the following simple query -- select * from production.product. This resulted in 504 rows. Saving the results in notepad, ended up with a file of 100Kb (that’s counting tab characters, NULL spelled out as text, etc.) This gives us a very rough idea on the number of bytes utilized by the data itself.
My measurements show the following:
- Memory consumption using Dataset = 290,436 (3.5 times larger than using Products class)
- Memory consumption using array of objects = 140,028 (1.7 times larger than using Products class)
- Memory consumption using Product class = 82,656 (BEST)
- Remoting footprint of DataSet using binary serialization & binary formatter = 124,742 (BEST for DataSet, 1.35 times larger than using Products class)
- Remoting footprint of DataSet using xml serialization & soap formatter = 530,222 (4.25 times larger than binary/binary)
- Remoting footprint of DataSet using xml serialization & binary formatter = 367,403 (3 times larger than binary/binary)
- Remoting footprint of Products class using binary formatter = 92,598 (BEST overall)
So, no matter how you turn it, strongly typed custom classes are your best bet in terms of memory consumption!
Oh, one more thing – the serialization itself took 30 ms using binary formatter and 80 ms using XML/SOAP.
Below is the code used:
namespace WindowsApplication1
{
public partial class Form1 : Form
{
public Form1()
{
InitializeComponent();
}
private void button1_Click(object sender, EventArgs e)
{
System.Data.SqlClient.SqlDataAdapter da = new System.Data.SqlClient.SqlDataAdapter("select * from production.product", "Data Source=(local);Initial Catalog=AdventureWorks;Integrated Security=True");
System.Data.DataSet ds = new DataSet();
long s1 = GC.GetTotalMemory(true);
da.Fill(ds);
long s2 = GC.GetTotalMemory(true);
System.Diagnostics.Debug.WriteLine(string.Format("Memory consumption using Dataset = {0}", (s2-s1)));
// 504 rows
long dataSize = 0;
object[] data = new object[ds.Tables[0].Rows.Count];
int i = 0;
long d1 = GC.GetTotalMemory(true);
foreach (System.Data.DataRow row in ds.Tables[0].Rows)
{
data[i++] = row.ItemArray;
}
long d2 = GC.GetTotalMemory(true);
dataSize += (d2 - d1);
System.Diagnostics.Debug.WriteLine(string.Format("Memory consumption using array of objects = {0}", dataSize));
long dataSize2 = 0;
object[] data2 = new object[ds.Tables[0].Rows.Count];
i = 0;
long d11 = GC.GetTotalMemory(true);
foreach (System.Data.DataRow row in ds.Tables[0].Rows)
{
data2[i++] = new Product(row.ItemArray);
}
long d22 = GC.GetTotalMemory(true);
dataSize2 += (d22 - d11);
System.Diagnostics.Debug.WriteLine(string.Format("Memory consumption using Product class = {0}", dataSize2));
ds.RemotingFormat = SerializationFormat.Binary;
long l1 = GetObjectSize(ds);
System.Diagnostics.Debug.WriteLine(string.Format("Remoting footprint of DataSet using binary serialization & binary formatter = {0}", l1));
ds.RemotingFormat = SerializationFormat.Xml;
long l2 = GetSoapObjectSize(ds);
System.Diagnostics.Debug.WriteLine(string.Format("Remoting footprint of DataSet using xml serialization & soap formatter = {0}", l2));
ds.RemotingFormat = SerializationFormat.Xml;
long l3 = GetObjectSize(ds);
System.Diagnostics.Debug.WriteLine(string.Format("Remoting footprint of DataSet using xml serialization & binary formatter = {0}", l3));
long l4 = GetObjectSize(data2);
System.Diagnostics.Debug.WriteLine(string.Format("Remoting footprint of Products class using binary formatter = {0}", l4));
}
private long GetObjectSize(object data)
{
long result = 0;
System.Runtime.Serialization.IFormatter formatter = new System.Runtime.Serialization.Formatters.Binary.BinaryFormatter();
System.IO.MemoryStream stream = new System.IO.MemoryStream();
long t1 = DateTime.Now.Ticks;
formatter.Serialize(stream, data);
long t2 = DateTime.Now.Ticks;
result = stream.Length;
stream.Close();
System.Diagnostics.Debug.WriteLine((t2 - t1) / TimeSpan.TicksPerMillisecond + " ms");
return result;
}
private long GetSoapObjectSize(object data)
{
long result = 0;
System.Runtime.Serialization.IFormatter formatter = new System.Runtime.Serialization.Formatters.Soap.SoapFormatter();
System.IO.MemoryStream stream = new System.IO.MemoryStream();
long t1 = DateTime.Now.Ticks;
formatter.Serialize(stream, data);
long t2 = DateTime.Now.Ticks;
result = stream.Length;
stream.Close();
System.Diagnostics.Debug.WriteLine((t2 - t1) / TimeSpan.TicksPerMillisecond + " ms");
return result;
}
}
[Serializable]
public class Product
{
public int ProductID;
public string Name;
public string ProductNumber;
public bool MakeFlag;
public bool FinishedGoodsFlag;
public string Color;
public short SafetyStockLevel;
public short ReorderPoint;
public decimal StandardCost;
public decimal ListPrice;
public string Size;
public string SizeUnitMeasureCode;
public string WeightUnitMeasureCode;
public decimal Weight;
public int DaysToManufacture;
public string ProductLine;
public string Class;
public string Style;
public int ProductSubcategoryID;
public int ProductModelID;
public DateTime SellStartDate;
public DateTime SellEndDate;
public DateTime DiscontinuedDate;
public Guid rowguid;
public DateTime ModifiedDate;
public Product(object[] data)
{
ProductID = (int) data[0];
Name = (string) data[1];
ProductNumber = (string) data[2];
MakeFlag = (bool) data[3];
FinishedGoodsFlag = (bool) data[4];
if (!(data[5] is DBNull))
Color = (string)data[5];
SafetyStockLevel = (short)data[6];
ReorderPoint = (short)data[7];
StandardCost = (decimal)data[8];
ListPrice = (decimal)data[9];
if (!(data[10] is DBNull))
Size = (string)data[10];
if (!(data[11] is DBNull))
SizeUnitMeasureCode = (string)data[11];
if (!(data[12] is DBNull))
WeightUnitMeasureCode = (string)data[12];
if (!(data[13] is DBNull))
Weight = (decimal)data[13];
DaysToManufacture = (int)data[14];
if (!(data[15] is DBNull))
ProductLine = (string)data[15];
if (!(data[16] is DBNull))
Class = (string)data[16];
if (!(data[17] is DBNull))
Style = (string)data[17];
if (!(data[18] is DBNull))
ProductSubcategoryID = (int)data[18];
if (!(data[19] is DBNull))
ProductModelID = (int)data[19];
SellStartDate = (DateTime) data[20];
if (!(data[21] is DBNull))
SellEndDate = (DateTime) data[21];
if (!(data[22] is DBNull))
DiscontinuedDate = (DateTime)data[22];
rowguid = (System.Guid)data[23];
ModifiedDate = (DateTime)data[24];
}
}
}
Comments
- Anonymous
March 08, 2006
Excellent stuff! This is just the kind of thing I've been looking for. - Anonymous
March 17, 2008
Hi Irena, You have given one more proof to use custom classes. Thanks very much.