From 0cd29990eabdb70195d5264f2ff1506277b64e56 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Mon, 9 Mar 2026 03:56:20 +0000 Subject: [PATCH 1/2] test: add CSV property-based tests and CSV benchmarks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Task 9 (Testing Improvements): Add CsvParserProperties.fs with 10 tests: - FsCheck property-based roundtrip test (500 random test cases) verifying that arbitrary string values survive encode → parse roundtrip with comma separator and double-quote quoting - Targeted tests for fields containing separators, quotes, newlines (LF), CRLF, tab separators, custom quote chars, multiple rows, empty fields, and single-column empty-field rows (which previously had no coverage) Task 8 (Performance): Add CsvBenchmarks.fs with 8 benchmarks covering: - Parse-only and full row-iteration for AirQuality (3 KB), banklist (40 KB), Titanic (60 KB), and MSFT (328 KB) CSV files - Also fix Program.fs to actually run HtmlBenchmarks and CsvBenchmarks (HtmlBenchmarks was defined but never wired into the program entry point) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- tests/FSharp.Data.Benchmarks/CsvBenchmarks.fs | 55 +++++++ .../FSharp.Data.Benchmarks.fsproj | 1 + tests/FSharp.Data.Benchmarks/Program.fs | 6 +- .../CsvParserProperties.fs | 144 ++++++++++++++++++ .../FSharp.Data.Core.Tests.fsproj | 1 + 5 files changed, 206 insertions(+), 1 deletion(-) create mode 100644 tests/FSharp.Data.Benchmarks/CsvBenchmarks.fs create mode 100644 tests/FSharp.Data.Core.Tests/CsvParserProperties.fs diff --git a/tests/FSharp.Data.Benchmarks/CsvBenchmarks.fs b/tests/FSharp.Data.Benchmarks/CsvBenchmarks.fs new file mode 100644 index 000000000..189e882f4 --- /dev/null +++ b/tests/FSharp.Data.Benchmarks/CsvBenchmarks.fs @@ -0,0 +1,55 @@ +namespace FSharp.Data.Benchmarks + +open System +open System.IO +open BenchmarkDotNet.Attributes +open FSharp.Data + +[] +[] +type CsvBenchmarks() = + + let mutable airQualityCsvText = "" + let mutable msftCsvText = "" + let mutable titanicCsvText = "" + let mutable banklistCsvText = "" + + [] + member this.Setup() = + let dataPath = Path.Combine(__SOURCE_DIRECTORY__, "../FSharp.Data.Tests/Data") + airQualityCsvText <- File.ReadAllText(Path.Combine(dataPath, "AirQuality.csv")) + msftCsvText <- File.ReadAllText(Path.Combine(dataPath, "MSFT.csv")) + titanicCsvText <- File.ReadAllText(Path.Combine(dataPath, "Titanic.csv")) + banklistCsvText <- File.ReadAllText(Path.Combine(dataPath, "banklist.csv")) + + [] + member this.ParseAirQualityCsv() = CsvFile.Parse(airQualityCsvText) + + [] + member this.IterateAirQualityCsv() = + let csv = CsvFile.Parse(airQualityCsvText) + csv.Rows |> Seq.length + + [] + member this.ParseMSFTCsv() = CsvFile.Parse(msftCsvText) + + [] + member this.IterateMSFTCsv() = + let csv = CsvFile.Parse(msftCsvText) + csv.Rows |> Seq.length + + [] + member this.ParseTitanicCsv() = CsvFile.Parse(titanicCsvText) + + [] + member this.IterateTitanicCsv() = + let csv = CsvFile.Parse(titanicCsvText) + csv.Rows |> Seq.length + + [] + member this.ParseBanklistCsv() = CsvFile.Parse(banklistCsvText) + + [] + member this.IterateBanklistCsv() = + let csv = CsvFile.Parse(banklistCsvText) + csv.Rows |> Seq.length diff --git a/tests/FSharp.Data.Benchmarks/FSharp.Data.Benchmarks.fsproj b/tests/FSharp.Data.Benchmarks/FSharp.Data.Benchmarks.fsproj index 306e84886..ae1b88d7b 100644 --- a/tests/FSharp.Data.Benchmarks/FSharp.Data.Benchmarks.fsproj +++ b/tests/FSharp.Data.Benchmarks/FSharp.Data.Benchmarks.fsproj @@ -15,6 +15,7 @@ + diff --git a/tests/FSharp.Data.Benchmarks/Program.fs b/tests/FSharp.Data.Benchmarks/Program.fs index 49be49dc0..9e66baea8 100644 --- a/tests/FSharp.Data.Benchmarks/Program.fs +++ b/tests/FSharp.Data.Benchmarks/Program.fs @@ -10,9 +10,13 @@ let main args = match args with | [| "json" |] -> BenchmarkRunner.Run() |> ignore | [| "conversions" |] -> BenchmarkRunner.Run() |> ignore - | _ -> + | [| "html" |] -> BenchmarkRunner.Run() |> ignore + | [| "csv" |] -> BenchmarkRunner.Run() |> ignore + | _ -> printfn "Running all benchmarks..." BenchmarkRunner.Run() |> ignore BenchmarkRunner.Run() |> ignore + BenchmarkRunner.Run() |> ignore + BenchmarkRunner.Run() |> ignore 0 \ No newline at end of file diff --git a/tests/FSharp.Data.Core.Tests/CsvParserProperties.fs b/tests/FSharp.Data.Core.Tests/CsvParserProperties.fs new file mode 100644 index 000000000..c172b3f8e --- /dev/null +++ b/tests/FSharp.Data.Core.Tests/CsvParserProperties.fs @@ -0,0 +1,144 @@ +module FSharp.Data.Tests.CsvParserProperties + +open NUnit.Framework +open FsUnit +open System +open System.IO +open FSharp.Data.Runtime.CsvReader +open FsCheck + +/// Encodes a CSV field value according to RFC 4180, quoting when necessary. +/// Empty strings are always quoted to avoid producing blank CSV lines. +let private encodeCsvField (separator: char) (quote: char) (value: string) = + let needsQuoting = + value = "" + || value.Contains(separator) + || value.Contains(quote) + || value.Contains('\n') + || value.Contains('\r') + + if needsQuoting then + let escaped = value.Replace(string quote, string quote + string quote) + sprintf "%c%s%c" quote escaped quote + else + value + +/// Encodes a row of field values as a single CSV line. +let private encodeCsvRow (separator: char) (quote: char) (fields: string[]) = + fields |> Array.map (encodeCsvField separator quote) |> String.concat (string separator) + +/// Parses a CSV string and returns all logical rows as arrays of field values. +let private parseCsv (csv: string) (separator: char) (quote: char) = + use reader = new StringReader(csv) + readCsvFile reader (string separator) quote |> Seq.map fst |> Array.ofSeq + +[] +let ``CSV roundtrip property: arbitrary string values are preserved when properly encoded`` () = + let separator = ',' + let quote = '"' + + // Generate non-empty rows of non-null strings + let fieldGen = Arb.generate |> Gen.map (fun s -> if s = null then "" else s) + + let rowGen = + Gen.nonEmptyListOf fieldGen |> Gen.map Array.ofList |> Gen.resize 6 + + let rowsGen = Gen.nonEmptyListOf rowGen |> Gen.map Array.ofList |> Gen.resize 8 + + let prop (rows: string[][]) = + let csv = + rows |> Array.map (encodeCsvRow separator quote) |> String.concat "\n" + + let parsed = parseCsv csv separator quote + + parsed.Length = rows.Length + && Array.forall2 + (fun (expected: string[]) (actual: string[]) -> + expected.Length = actual.Length && Array.forall2 (=) expected actual) + rows + parsed + + Check.One( + { Config.QuickThrowOnFailure with MaxTest = 500 }, + Prop.forAll (Arb.fromGen rowsGen) prop + ) + +[] +let ``CSV roundtrip: field containing separator is preserved as a single field`` () = + let fields = [| "value with, comma"; "normal field"; "a,b,c" |] + let csv = encodeCsvRow ',' '"' fields + let parsed = parseCsv csv ',' '"' + parsed.Length |> should equal 1 + parsed.[0] |> should equal fields + +[] +let ``CSV roundtrip: field containing quote character is preserved`` () = + let fields = [| "say \"hello\" world"; "normal"; "she said \"hi\"" |] + let csv = encodeCsvRow ',' '"' fields + let parsed = parseCsv csv ',' '"' + parsed.Length |> should equal 1 + parsed.[0] |> should equal fields + +[] +let ``CSV roundtrip: field containing newline spans one logical row`` () = + let fields = [| "line1\nline2"; "normal" |] + let csv = encodeCsvRow ',' '"' fields + let parsed = parseCsv csv ',' '"' + parsed.Length |> should equal 1 + parsed.[0] |> should equal fields + +[] +let ``CSV roundtrip: field containing carriage-return newline is preserved`` () = + let fields = [| "multi\r\nline"; "normal" |] + let csv = encodeCsvRow ',' '"' fields + let parsed = parseCsv csv ',' '"' + parsed.Length |> should equal 1 + parsed.[0] |> should equal fields + +[] +let ``CSV roundtrip: tab separator is supported`` () = + let fields = [| "a,b,c"; "d\te"; "normal" |] + let csv = encodeCsvRow '\t' '"' fields + let parsed = parseCsv csv '\t' '"' + parsed.Length |> should equal 1 + parsed.[0] |> should equal fields + +[] +let ``CSV roundtrip: custom quote character is respected`` () = + let fields = [| "value with, comma"; "value with 'single'" |] + let csv = encodeCsvRow ',' '\'' fields + let parsed = parseCsv csv ',' '\'' + parsed.Length |> should equal 1 + parsed.[0] |> should equal fields + +[] +let ``CSV roundtrip: multiple rows with varied content are all preserved`` () = + let rows = + [| [| "Alice"; "30"; "New York, NY" |] + [| "Bob"; "25"; "Los Angeles" |] + [| "Charlie says \"hello\""; "35"; "Chicago\nIL" |] |] + + let csv = rows |> Array.map (encodeCsvRow ',' '"') |> String.concat "\n" + let parsed = parseCsv csv ',' '"' + parsed.Length |> should equal rows.Length + + Array.iter2 (fun expected actual -> actual |> should equal expected) rows parsed + +[] +let ``CSV roundtrip: empty string field is preserved`` () = + let fields = [| ""; "non-empty"; "" |] + let csv = encodeCsvRow ',' '"' fields + let parsed = parseCsv csv ',' '"' + parsed.Length |> should equal 1 + parsed.[0] |> should equal fields + +[] +let ``CSV roundtrip: single-column empty field row is not lost`` () = + let rows = [| [| "before" |]; [| "" |]; [| "after" |] |] + + let csv = rows |> Array.map (encodeCsvRow ',' '"') |> String.concat "\n" + let parsed = parseCsv csv ',' '"' + parsed.Length |> should equal 3 + parsed.[0] |> should equal [| "before" |] + parsed.[1] |> should equal [| "" |] + parsed.[2] |> should equal [| "after" |] diff --git a/tests/FSharp.Data.Core.Tests/FSharp.Data.Core.Tests.fsproj b/tests/FSharp.Data.Core.Tests/FSharp.Data.Core.Tests.fsproj index 090a43f82..8f703b3c1 100644 --- a/tests/FSharp.Data.Core.Tests/FSharp.Data.Core.Tests.fsproj +++ b/tests/FSharp.Data.Core.Tests/FSharp.Data.Core.Tests.fsproj @@ -36,6 +36,7 @@ + From 127702415fe916991e3e10e9a1ae8e5b5c16b188 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Mon, 9 Mar 2026 04:01:57 +0000 Subject: [PATCH 2/2] ci: trigger checks