Freigeben über


SYSK 166: Stream Output Performance Comparison

If you need to write a large number of data items (e.g. records with many fields) to a stream, do you think it’s faster to use multiple stream.Write statements or “batch it” via string.Format and use one stream.Write statement per record?

 

My tests show that you’ll get almost twice the performance by using fewer stream.Write statements (see code below):

Many statements: 6875000 ticks

Batch: 3437500 ticks

Many statements: 6406250 ticks

Batch: 3281250 ticks

Many statements: 6093750 ticks

Batch: 3437500 ticks

 

Note:  I tested this using a MemoryStream and writing to a File – both resulted in roughly same ratio.

 

private void button1_Click(object sender, EventArgs e)

{

    int[] numbers = new int[10];

    for (int i = 0; i < numbers.Length; i++)

    {

        numbers[i] = i;

    }

   

    System.IO.MemoryStream ms = new System.IO.MemoryStream();

    System.IO.StreamWriter stream = new System.IO.StreamWriter(ms);

  

    long t1, t2;

    t1 = DateTime.Now.Ticks;

    for (int i = 0; i < 100000; i++)

    {

        stream.Write("{0}", numbers[0]);

        stream.Write("{0}", numbers[1]);

        stream.Write("{0}", numbers[2]);

        stream.Write("{0}", numbers[3]);

        stream.Write("{0}", numbers[4]);

        stream.Write("{0}", numbers[5]);

        stream.Write("{0}", numbers[6]);

        stream.Write("{0}", numbers[7]);

        stream.Write("{0}", numbers[8]);

        stream.Write("{0}", numbers[9]);

    }

    t2 = DateTime.Now.Ticks;

    stream.Close();

    ms.Close();

    System.Diagnostics.Debug.WriteLine(string.Format("Many statements: {0} ticks", t2 - t1));

    ms = new System.IO.MemoryStream();

    stream = new System.IO.StreamWriter(ms);

    t1 = DateTime.Now.Ticks;

    for (int i = 0; i < 100000; i++)

    {

        string buffer = string.Format("{0}{1}{2}{3}{4}{5}{6}{7}{8}{9}",

            numbers[0], numbers[1], numbers[2], numbers[3], numbers[4], numbers[5],

            numbers[6], numbers[7], numbers[8], numbers[9]);

        stream.Write(buffer);

    }

    t2 = DateTime.Now.Ticks;

    stream.Close();

    ms.Close();

    System.Diagnostics.Debug.WriteLine(string.Format("Batch: {0} ticks", t2 - t1));

}

Comments

  • Anonymous
    July 28, 2006
    Not entirely unexpected, but half as slow seems huge to me.  The formatting overhead should roughly be the same; which leads me to wonder why batching is that much faster.

    ...good to know though, thanks.
  • Anonymous
    July 28, 2006
    I guess the lesson learned here is try to make as few calls to Stream.Write as possible
  • Anonymous
    September 19, 2006
    Your sample test is inadequate. The reason of such difference in performance figures is in multiple usage of the String.Format method.

    The following snippet shows results that differs from yours:

           private static void CallMe()
           {
               int[] numbers = new int[10];

               for(int i = 0; i < numbers.Length; i++)
               {
                   numbers[i] = i;
               }

               System.IO.MemoryStream ms = new System.IO.MemoryStream();
               System.IO.StreamWriter stream = new System.IO.StreamWriter(ms);

               long t1, t2;

               t1 = DateTime.Now.Ticks;

               for(int i = 0; i < 100000; i++)
               {
                   stream.Write(numbers[0].ToString());
                   stream.Write(numbers[1].ToString());
                   stream.Write(numbers[2].ToString());
                   stream.Write(numbers[3].ToString());
                   stream.Write(numbers[4].ToString());
                   stream.Write(numbers[5].ToString());
                   stream.Write(numbers[6].ToString());
                   stream.Write(numbers[7].ToString());
                   stream.Write(numbers[8].ToString());
                   stream.Write(numbers[9].ToString());
               }

               t2 = DateTime.Now.Ticks;

               stream.Close();
               ms.Close();
               
               Console.WriteLine(string.Format("Many statements:  {0} ticks", t2 - t1));

               ms = new System.IO.MemoryStream();
               stream = new System.IO.StreamWriter(ms);
               t1 = DateTime.Now.Ticks;

               for(int i = 0; i < 100000; i++)
               {
                   string buffer = string.Format("{0}{1}{2}{3}{4}{5}{6}{7}{8}{9}",
                       numbers[0], numbers[1], numbers[2], numbers[3], numbers[4], numbers[5],
                       numbers[6], numbers[7], numbers[8], numbers[9]);

                   stream.Write(buffer);
               }

               t2 = DateTime.Now.Ticks;

               stream.Close();
               ms.Close();

               Console.WriteLine(string.Format("Batch:  {0} ticks", t2 - t1));
           }

    The results are:

    Many statements:  3750336 ticks
    Batch:  5312976 ticks

    Many statements:  3906600 ticks
    Batch:  5312976 ticks

    Many statements:  3750336 ticks
    Batch:  5469240 ticks





    If we set capacity of the memory streams and get rid of String.Format at all, the performance figures are exactly the same.

    Sample code:

           private static void CallMe()
           {
               int[] numbers = new int[10];

               for(int i = 0; i < numbers.Length; i++)
               {
                   numbers[i] = i;
               }

               System.IO.MemoryStream ms = new System.IO.MemoryStream(10000000);
               System.IO.StreamWriter stream = new System.IO.StreamWriter(ms);

               long t1, t2;

               t1 = DateTime.Now.Ticks;

               for(int i = 0; i < 100000; i++)
               {
                   stream.Write(numbers[0].ToString());
                   stream.Write(numbers[1].ToString());
                   stream.Write(numbers[2].ToString());
                   stream.Write(numbers[3].ToString());
                   stream.Write(numbers[4].ToString());
                   stream.Write(numbers[5].ToString());
                   stream.Write(numbers[6].ToString());
                   stream.Write(numbers[7].ToString());
                   stream.Write(numbers[8].ToString());
                   stream.Write(numbers[9].ToString());
               }

               t2 = DateTime.Now.Ticks;

               stream.Close();
               ms.Close();
               
               Console.WriteLine(string.Format("Many statements:  {0} ticks", t2 - t1));

               ms = new System.IO.MemoryStream(10000000);
               stream = new System.IO.StreamWriter(ms);
               t1 = DateTime.Now.Ticks;

               for(int i = 0; i < 100000; i++)
               {
                   string buffer = string.Concat(
                       numbers[0].ToString(), numbers[1].ToString(), numbers[2].ToString(), numbers[3].ToString(), numbers[4].ToString(), numbers[5].ToString(),
                       numbers[6].ToString(), numbers[7].ToString(), numbers[8].ToString(), numbers[9].ToString());

                   stream.Write(buffer);
               }

               t2 = DateTime.Now.Ticks;

               stream.Close();
               ms.Close();

               Console.WriteLine(string.Format("Batch:  {0} ticks", t2 - t1));
           }

    The results are:

    Many statements:  3906600 ticks
    Batch:  3906600 ticks

    Many statements:  3906600 ticks
    Batch:  3906600 ticks

    Many statements:  3906600 ticks
    Batch:  3906600 ticks