共用方式為


HOW TO:計算 CSV 文字檔案中的資料行值 (LINQ)

本範例顯示如何在 .csv 檔案的資料行上執行彙總 (Aggregate) 計算,例如 Sum、Average、Min 和 Max。 這裡所顯示的範例準則也適用於其他類型的結構化文字。

若要建立原始程式檔

  • 將下列各行複製到名為 scores.csv 的檔案,並將它儲存在您的方案資料夾中。 假設第一個資料行代表學號,而後續資料行則代表四項測驗的成績。

    111, 97, 92, 81, 60
    112, 75, 84, 91, 39
    113, 88, 94, 65, 91
    114, 97, 89, 85, 82
    115, 35, 72, 91, 70
    116, 99, 86, 90, 94
    117, 93, 92, 80, 87
    118, 92, 90, 83, 78
    119, 68, 79, 88, 92
    120, 99, 82, 81, 79
    121, 96, 85, 91, 60
    122, 94, 92, 91, 91
    

範例

    Class SumColumns

        Public Shared Sub Main()

            Dim lines As String() = System.IO.File.ReadAllLines("../../../scores.csv")

            ' Specifies the column to compute
            ' This value could be passed in at runtime.
            Dim exam = 3

            ' Spreadsheet format:
            ' Student ID    Exam#1  Exam#2  Exam#3  Exam#4
            ' 111,          97,     92,     81,     60
            ' one is added to skip over the first column
            ' which holds the student ID.
            SumColumn(lines, exam + 1)
            Console.WriteLine()
            MultiColumns(lines)

            ' Keep the console window open in debug mode.
            Console.WriteLine("Press any key to exit...")
            Console.ReadKey()

        End Sub

        Shared Sub SumColumn(ByVal lines As IEnumerable(Of String), ByVal col As Integer)

            ' This query performs two steps:
            ' split the string into a string array
            ' convert the specified element to
            ' integer and select it.
            Dim columnQuery = From line In lines 
                               Let x = line.Split(",") 
                               Select Convert.ToInt32(x(col))

            ' Execute and cache the results for performance.
            ' Only needed with very large files.
            Dim results = columnQuery.ToList()

            ' Perform aggregate calculations 
            ' on the column specified by col.
            Dim avgScore = Aggregate score In results Into Average(score)
            Dim minScore = Aggregate score In results Into Min(score)
            Dim maxScore = Aggregate score In results Into Max(score)

            Console.WriteLine("Single Column Query:")
            Console.WriteLine("Exam #{0}: Average:{1:##.##} High Score:{2} Low Score:{3}", 
                         col, avgScore, maxScore, minScore)


        End Sub

        Shared Sub MultiColumns(ByVal lines As IEnumerable(Of String))

            Console.WriteLine("Multi Column Query:")

            ' Create the query. It will produce nested sequences. 
            ' multiColQuery performs these steps:
            ' 1) convert the string to a string array
            ' 2) skip over the "Student ID" column and take the rest
            ' 3) convert each field to an int and select that 
            '    entire sequence as one row in the results.
            Dim multiColQuery = From line In lines 
                                Let fields = line.Split(",") 
                                Select From str In fields Skip 1 
                                            Select Convert.ToInt32(str)

            Dim results = multiColQuery.ToList()

            ' Find out how many columns we have.
            Dim columnCount = results(0).Count()

            ' Perform aggregate calculations on each column.            
            ' One loop for each score column in scores.
            ' We can use a for loop because we have already
            ' executed the multiColQuery in the call to ToList.

            For j As Integer = 0 To columnCount - 1
                Dim column = j
                Dim res2 = From row In results 
                           Select row.ElementAt(column)

                ' Perform aggregate calculations 
                ' on the column specified by col.
                Dim avgScore = Aggregate score In res2 Into Average(score)
                Dim minScore = Aggregate score In res2 Into Min(score)
                Dim maxScore = Aggregate score In res2 Into Max(score)

                ' Add 1 to column numbers because exams in this course start with #1
                Console.WriteLine("Exam #{0} Average: {1:##.##} High Score: {2} Low Score: {3}", 
                                  column + 1, avgScore, maxScore, minScore)

            Next
        End Sub

    End Class
    ' Output:
    ' Single Column Query:
    ' Exam #4: Average:76.92 High Score:94 Low Score:39

    ' Multi Column Query:
    ' Exam #1 Average: 86.08 High Score: 99 Low Score: 35
    ' Exam #2 Average: 86.42 High Score: 94 Low Score: 72
    ' Exam #3 Average: 84.75 High Score: 91 Low Score: 65
    ' Exam #4 Average: 76.92 High Score: 94 Low Score: 39

class SumColumns
{
    static void Main(string[] args)
    {
        string[] lines = System.IO.File.ReadAllLines(@"../../../scores.csv");

        // Specifies the column to compute
        int exam = 3;

        // Spreadsheet format:
        // Student ID    Exam#1  Exam#2  Exam#3  Exam#4
        // 111,          97,     92,     81,     60
        // one is added to skip over the first column
        // which holds the student ID.
        SingleColumn(lines, exam + 1);
        Console.WriteLine();
        MultiColumns(lines);

        Console.WriteLine("Press any key to exit");
        Console.ReadKey();
    }

    static void SingleColumn(IEnumerable<string> strs, int examNum)
    {
        Console.WriteLine("Single Column Query:");

        // examNum specifies the column to run the 
        // calculations on. This could also be
        // passed in dynamically at runtime.             

        // columnQuery is a IEnumerable<int>
        // This query performs two steps:
        // 1) split the string into a string[]
        // 2) convert the specified element to
        //    int and select it.
        var columnQuery =
            from line in strs
            let x = line.Split(',')
            select Convert.ToInt32(x[examNum]);

        // Execute and cache the results for performance.
        // Only needed with very large files.
        var results = columnQuery.ToList();

        // Perform aggregate calculations 
        // on the column specified by examNum.
        double average = results.Average();
        int max = results.Max();
        int min = results.Min();

        Console.WriteLine("Exam #{0}: Average:{1:##.##} High Score:{2} Low Score:{3}",
                 examNum, average, max, min);
    }
    static void MultiColumns(IEnumerable<string> strs)
    {
        Console.WriteLine("Multi Column Query:");

        // Create the columnQuery. Explicit typing is used
        // to make clear that the columnQuery will produce 
        // nested sequences. You can also just use 'var'.
        // The columnQuery performs these steps:
        // 1) convert the string to a string[]
        // 2) skip over the "Student ID" column and take the rest
        // 3) convert each string to an int and select that 
        //    entire sequence as one row in the results.
        IEnumerable<IEnumerable<int>> query =
            from line in strs
            let x = line.Split(',')
            let y = x.Skip(1)
            select (from str in y
                    select Convert.ToInt32(str));

        // Execute and cache the results for performance.
        // ToArray could also be used here.
        var results = query.ToList();

        // Find out how many columns we have.
        int columnCount = results[0].Count();

        // Perform aggregate calculations on each column.            
        // One loop for each score column in scores.
        // We can use a for loop because we have already
        // executed the columnQuery in the call to ToList.
        for (int column = 0; column < columnCount; column++)
        {
            var res2 = from row in results
                       select row.ElementAt(column);
            double average = res2.Average();
            int max = res2.Max();
            int min = res2.Min();

            // 1 is added to column because Exam numbers
            // begin with 1
            Console.WriteLine("Exam #{0} Average: {1:##.##} High Score: {2} Low Score: {3}",
                          column + 1, average, max, min);
        }
    }
}
/* Output:
    Single Column Query:
    Exam #4: Average:76.92 High Score:94 Low Score:39

    Multi Column Query:
    Exam #1 Average: 86.08 High Score: 99 Low Score: 35
    Exam #2 Average: 86.42 High Score: 94 Low Score: 72
    Exam #3 Average: 84.75 High Score: 91 Low Score: 65
    Exam #4 Average: 76.92 High Score: 94 Low Score: 39
 */

此查詢的運作方式是使用 Split 方法將每一行文字轉換成陣列。 每個陣列元素代表一個資料行。 最後,每個資料行中的文字都會轉換成其數值表示。 如果您的檔案是定位分隔檔,請直接將 Split 方法中的引數更新為 \t。

編譯程式碼

  • 建立一個以 .NET Framework 3.5 版為目標的 Visual Studio 專案。 根據預設,專案有 System.Core.dll 的參考,以及 System.Linq 命名空間的 using 指示詞 (C#) 或 Imports 陳述式 (Visual Basic)。

  • 請將這段程式碼複製到您的專案,

  • 按 F5 編譯和執行程式。

  • 按任何鍵離開主控台視窗。

請參閱

概念

LINQ 和字串

LINQ 和檔案目錄