From c49c8a47197a6f365756cef317eb5a0086c48f54 Mon Sep 17 00:00:00 2001 From: Sergey Khruschak Date: Fri, 27 Jun 2025 21:49:51 +0300 Subject: [PATCH 1/2] Column types added --- Package.resolved | 4 +- Package.swift | 2 +- Sources/table/Cell.swift | 7 +- Sources/table/CellType.swift | 88 +++++++++++++++++++++ Sources/table/Extensions.swift | 17 +++- Sources/table/Format.swift | 20 +++-- Sources/table/Header.swift | 28 +++++-- Sources/table/Join.swift | 2 +- Sources/table/MainApp.swift | 7 +- Sources/table/Row.swift | 21 +---- Sources/table/Table.swift | 104 +++++++++++++++---------- Sources/table/TableView.swift | 7 +- Sources/table/Version.swift | 2 +- Tests/table-Tests/CellTypesTests.swift | 29 +++++++ Tests/table-Tests/FilterTests.swift | 2 +- 15 files changed, 255 insertions(+), 85 deletions(-) create mode 100644 Sources/table/CellType.swift create mode 100644 Tests/table-Tests/CellTypesTests.swift diff --git a/Package.resolved b/Package.resolved index b1ff0a0..aceaff7 100644 --- a/Package.resolved +++ b/Package.resolved @@ -5,8 +5,8 @@ "kind" : "remoteSourceControl", "location" : "https://github.com/apple/swift-argument-parser.git", "state" : { - "revision" : "c8ed701b513cf5177118a175d85fbbbcd707ab41", - "version" : "1.3.0" + "revision" : "011f0c765fb46d9cac61bca19be0527e99c98c8b", + "version" : "1.5.1" } } ], diff --git a/Package.swift b/Package.swift index 707ce97..b0188bd 100644 --- a/Package.swift +++ b/Package.swift @@ -6,7 +6,7 @@ import PackageDescription let package = Package( name: "table", dependencies: [ - .package(url: "https://github.com/apple/swift-argument-parser.git", from: "1.3.0"), + .package(url: "https://github.com/apple/swift-argument-parser.git", from: "1.5.1"), // .package(url: "https://github.com/groue/GRMustache.swift", from: "4.0.0") ], targets: [ diff --git a/Sources/table/Cell.swift b/Sources/table/Cell.swift index 2ae1ea1..f711d46 100644 --- a/Sources/table/Cell.swift +++ b/Sources/table/Cell.swift @@ -1,6 +1,7 @@ class Cell { private var computedValue: String? private let computeValue: () -> String + public let type: CellType public var value: String { get { @@ -15,13 +16,15 @@ class Cell { public var description: String { return value } - init(value: String) { + init(value: String, type: CellType = .string) { self.computedValue = value self.computeValue = { value } + self.type = type } - init(fn: @escaping () -> String) { + init(fn: @escaping () -> String, type: CellType = .string) { self.computeValue = fn self.computedValue = nil + self.type = type } } diff --git a/Sources/table/CellType.swift b/Sources/table/CellType.swift new file mode 100644 index 0000000..c10bf31 --- /dev/null +++ b/Sources/table/CellType.swift @@ -0,0 +1,88 @@ +import Foundation + +enum CellType { + case string + case number + case date + case boolean + + static func fromString(_ type: String) throws -> CellType { + switch type.trimmingCharacters(in: .whitespaces).lowercased() { + case "string": return .string + case "number": return .number + case "date": return .date + case "boolean": return .boolean + default: throw RuntimeError("Unsupported cell type \(type)") + } + } + + static func fromStringList(_ types: String) throws -> [CellType] { + do { + // long format "string, number, date, boolean" + return try types.split(separator: ",") .map { try CellType.fromString(String($0.trimmingCharacters(in: .whitespaces))) } + } catch { + do { + // short format "sndb" for string, number, date, boolean + return try types.trimmingCharacters(in: .whitespaces).lowercased().map { c in + switch c { + case "s": return .string + case "n": return .number + case "d": return .date + case "b": return .boolean + default: throw RuntimeError("Unsupported cell type \(c)") + } + } + } catch { + throw RuntimeError("Unsupported cell type \(types)") + } + } + } + + // Infers cell types from the first few rows of data + static func infer(rows: [[String]]) -> [CellType] { + let dateFormat = DateFormatter() + dateFormat.dateFormat = "yyyy-MM-dd hh:mm:ss" + + // Infer cell types from the first row + var types: [CellType] = rows.first?.map { value in + if value.isNumber { + return .number + } else if value.isDate { + return .date + } else if value.isBoolean { + return .boolean + } else { + return .string + } + } ?? [] + + // refine with the rest of the rows + for row in rows.dropFirst() { + for (idx, value) in row.enumerated() { + let type = types[idx] + if type == .number && !value.isNumber { + types[idx] = .string + } else if type == .date && !value.isDate { + types[idx] = .string + } else if type == .boolean && !value.isBoolean { + types[idx] = .string + } + } + } + + debug("Infered cell types: \(CellType.toString(types))") + + return types + } + + static func toString(_ types: [CellType]) -> String { + return types.map { type in + switch type { + case .string: return "string" + case .number: return "number" + case .date: return "date" + case .boolean: return "boolean" + } + }.joined(separator: ", ") + } +} \ No newline at end of file diff --git a/Sources/table/Extensions.swift b/Sources/table/Extensions.swift index 8fef3d3..a6b77e1 100644 --- a/Sources/table/Extensions.swift +++ b/Sources/table/Extensions.swift @@ -1,3 +1,5 @@ +import Foundation + extension Optional { func orThrow(_ errorExpression: @autoclosure () -> Error) throws -> Wrapped { switch self { @@ -14,10 +16,23 @@ extension String { return self.range(of: regex, options: .regularExpression, range: nil, locale: nil) != nil } + // TODO: remove me var isNumber: Bool { - return self.matches("^-?[0-9]*$") + if let _ = Double(self) { + return true + } + return false } + var isDate: Bool { + let dateFormatter = DateFormatter() + dateFormatter.dateFormat = "yyyy-MM-dd HH:mm:ss" // Adjust as needed for your date format + return dateFormatter.date(from: self.replacingOccurrences(of: "T", with: " ")) != nil + } + + var isBoolean: Bool { + return self.caseInsensitiveCompare("true") == .orderedSame || self.caseInsensitiveCompare("false") == .orderedSame + } } extension Array { diff --git a/Sources/table/Format.swift b/Sources/table/Format.swift index acb30ef..6946c07 100644 --- a/Sources/table/Format.swift +++ b/Sources/table/Format.swift @@ -86,17 +86,15 @@ class Format { } if name == "%quoted_values" { - return row.components.map { - let v = $0.value - if v.caseInsensitiveCompare("true") == .orderedSame || - v.caseInsensitiveCompare("false") == .orderedSame || - v.caseInsensitiveCompare("null") == .orderedSame || - v.isNumber - { - return v - } else { - return "'\(v)'" - } + return row.components.enumerated().map { (index, cell) in + let v = cell.value + let type = row.header?.type(ofIndex: index) ?? .string + + if type == .boolean || type == .number || v.caseInsensitiveCompare("null") == .orderedSame { + return v + } else { + return "'\(v)'" + } }.joined(separator: ",") } diff --git a/Sources/table/Header.swift b/Sources/table/Header.swift index b7d0dbf..1ea8539 100644 --- a/Sources/table/Header.swift +++ b/Sources/table/Header.swift @@ -2,9 +2,10 @@ import Foundation class Header { let cols: [String] - let size: Int + let size: Int + let types: [CellType] - convenience init(data: String, delimeter: String, trim: Bool, hasOuterBorders: Bool) throws { + convenience init(data: String, delimeter: String, trim: Bool, hasOuterBorders: Bool, types: [CellType]? = nil) throws { var components = try Csv.parseLine(data, delimeter: delimeter) if trim { @@ -15,16 +16,18 @@ class Header { components = components.dropFirst().dropLast() } - self.init(components: components) + self.init(components: components, types: types ?? Array(repeating: .string, count: components.count)) } - init(components: [String]) { + init(components: [String], types: [CellType]) { cols = components size = components.count + self.types = types } static func auto(size: Int) -> Header { - Header(components: stride(from: 0, to: size, by: 1).map { idx in "col\(idx)" }) + let components = stride(from: 0, to: size, by: 1).map { idx in "col\(idx)" } + return Header(components: components, types: Array(repeating: .string, count: size)) } subscript(index: Int) -> String { @@ -39,13 +42,26 @@ class Header { cols.firstIndex(of: ofColumn) } + func type(ofColumn: String) -> CellType? { + guard let index = index(ofColumn: ofColumn) else { return .string } + return index < types.count ? types[index] : .string + } + + func type(ofIndex: Int) -> CellType? { + return ofIndex < types.count ? types[ofIndex] : .string + } + func components() -> [String] { cols } + + func withTypes(_ types: [CellType]) -> Header { + Header(components: cols, types: types) + } } extension Header { static func +(h1: Header, h2: Header) -> Header { - Header(components: h1.components() + h2.components()) + Header(components: h1.components() + h2.components(), types: h1.types + h2.types) } } \ No newline at end of file diff --git a/Sources/table/Join.swift b/Sources/table/Join.swift index 2185b5c..255b591 100644 --- a/Sources/table/Join.swift +++ b/Sources/table/Join.swift @@ -50,7 +50,7 @@ class Join { static func parse(_ file: String, joinOn: String?, firstTable: any Table) throws -> Join { - try parse(try ParsedTable.parse(path: file, hasHeader: nil, headerOverride: nil, delimeter: nil), joinOn: joinOn, firstTable: firstTable) + try parse(try ParsedTable.parse(path: file, hasHeader: nil, headerOverride: nil, delimeter: nil, userTypes: nil), joinOn: joinOn, firstTable: firstTable) } static func parse(_ matchTable: ParsedTable, joinOn: String?, firstTable: any Table) throws -> Join { diff --git a/Sources/table/MainApp.swift b/Sources/table/MainApp.swift index 622cb09..aac4dad 100644 --- a/Sources/table/MainApp.swift +++ b/Sources/table/MainApp.swift @@ -67,6 +67,9 @@ struct MainApp: ParsableCommand { @Option(name: .customLong("header"), help: "Override header. Columns should be specified separated by comma.") var header: String? + @Option(name: .customLong("types"), help: "Optionally specify column types explicitly. If not set, the tool will try to detect types automatically. Example: --types string,number,date or in short form . Supported types: string, number, date, boolean.") + var columnTypes: String? + @Option(name: .customLong("columns"), help: "Speficies a comma separated list of columns to show in the output. Not compatible with --print.") var columns: String? @@ -112,9 +115,11 @@ struct MainApp: ParsableCommand { print("Debug enabled") } + let userTypes = try columnTypes.map { try CellType.fromStringList($0) } + let headerOverride = header.map { try! Header(data: $0, delimeter: ",", trim: false, hasOuterBorders: false) } - var table: any Table = try ParsedTable.parse(path: inputFile, hasHeader: !noInHeader, headerOverride: headerOverride, delimeter: delimeter) + var table: any Table = try ParsedTable.parse(path: inputFile, hasHeader: !noInHeader, headerOverride: headerOverride, delimeter: delimeter, userTypes: userTypes) let filter = try filter.map { try Filter.compile(filter: $0, header: table.header) } diff --git a/Sources/table/Row.swift b/Sources/table/Row.swift index a003c2b..4f2c13c 100644 --- a/Sources/table/Row.swift +++ b/Sources/table/Row.swift @@ -5,25 +5,12 @@ class Row { let components: [Cell] let header: Header? - convenience init(header: Header?, index: Int, data: String, delimeter: String, trim: Bool, hasOuterBorders: Bool) { - var components = data.components(separatedBy: delimeter) - - if trim { - components = components.map { $0.trimmingCharacters(in: .whitespacesAndNewlines) } - } - - if hasOuterBorders { - components = components.dropFirst().dropLast() - } - - self.init(header: header, index: index, components: components) - } - - convenience init(header: Header?, index: Int, components: [String]) { - self.init(header: header, index: index, cells: components.map { Cell(value: $0) }) + convenience init(header: Header, index: Int, components: [String]) { + let components = zip(components, header.types).map { Cell(value: $0.0, type: $0.1) } + self.init(header: header, index: index, cells: components) } - init(header: Header?, index: Int, cells: [Cell]) { + init(header: Header, index: Int, cells: [Cell]) { self.header = header self.index = index self.components = cells diff --git a/Sources/table/Table.swift b/Sources/table/Table.swift index ba7ef75..c39b361 100644 --- a/Sources/table/Table.swift +++ b/Sources/table/Table.swift @@ -60,46 +60,32 @@ class ParsedTable: Table { var row = nextLine() - while technicalRow(row) { + while ParsedTable.technicalRow(row) { row = reader.readLine() } - if conf.type == .csv { - return row.map { row in - Row( - header: conf.header, - index:line, - components: try! Csv.parseLine(row, delimeter: conf.delimeter) - ) - } - } - - return row.map { row in - Row( + return try! row.map { row in + let components = try ParsedTable.readRowComponents(row, type: conf.type, delimeter: conf.delimeter, trim: conf.trim, hasOuterBorders: FileType.hasOuterBorders(type: conf.type)) + + return Row( header: conf.header, index:line, - data:row, - delimeter: conf.delimeter, - trim: conf.trim, - hasOuterBorders: FileType.hasOuterBorders(type: conf.type)) + components: components + ) } - } - - // matches rows that has to be skipped, usually horizontal delimeters - private func technicalRow(_ str: String?) -> Bool { - return str?.matches(ParsedTable.technicalRowPattern) ?? false - } + } static func empty() -> ParsedTable { return ParsedTable(reader: ArrayLineReader(lines: []), conf: TableConfig(header: Header.auto(size: 0)), prereadRows: []) } static func fromArray(_ data: [[String]], header: [String]? = nil) -> ParsedTable { - let parsedHeader = header.map { Header(components: $0) } ?? Header.auto(size: data.count) + let types = CellType.infer(rows: data) + let parsedHeader = header.map { Header(components: $0, types: types) } ?? Header.auto(size: data.count) return ParsedTable(reader: ArrayLineReader(components: data), conf: TableConfig(header: parsedHeader), prereadRows: []) } - static func parse(path: String?, hasHeader: Bool?, headerOverride: Header?, delimeter: String?) throws -> ParsedTable { + static func parse(path: String?, hasHeader: Bool?, headerOverride: Header?, delimeter: String?, userTypes: [CellType]?) throws -> ParsedTable { let file: FileHandle? if let path { @@ -108,11 +94,11 @@ class ParsedTable: Table { file = FileHandle.standardInput } - return try parse(reader: FileLineReader(fileHandle: file!), hasHeader: hasHeader, headerOverride: headerOverride, delimeter: delimeter) + return try parse(reader: FileLineReader(fileHandle: file!), hasHeader: hasHeader, headerOverride: headerOverride, delimeter: delimeter, userTypes: userTypes) } - static func parse(reader: LineReader, hasHeader: Bool?, headerOverride: Header?, delimeter: String?) throws -> ParsedTable { - if let (conf, prereadRows) = try ParsedTable.detectFile(reader:reader, hasHeader:hasHeader, headerOverride: headerOverride, delimeter: delimeter) { + static func parse(reader: LineReader, hasHeader: Bool?, headerOverride: Header? = nil, delimeter: String? = nil, userTypes: [CellType]? = nil) throws -> ParsedTable { + if let (conf, prereadRows) = try ParsedTable.detectFile(reader:reader, hasHeader:hasHeader, headerOverride: headerOverride, delimeter: delimeter, userTypes: userTypes) { return ParsedTable(reader: reader, conf: conf, prereadRows: prereadRows) } else { return ParsedTable.empty() @@ -123,28 +109,41 @@ class ParsedTable: Table { // Returns header (if present), file type, column delimeter and list of pre-read rows // Pre-read rows necessary for standard input where we can't rewind file back // TODO: has header is not yet used - static func detectFile(reader: LineReader, hasHeader: Bool?, headerOverride: Header?, delimeter: String?) throws -> (TableConfig, [String])? { + static func detectFile(reader: LineReader, hasHeader: Bool?, headerOverride: Header?, delimeter: String?, userTypes: [CellType]?) throws -> (TableConfig, [String])? { if let row = reader.readLine() { if row.matches(ParsedTable.ownHeaderPattern) { - if (Global.debug) { print("Detected tool own table format") } + debug("Detected tool own table format") let parsedHeader = try reader.readLine().map { try! Header(data: $0, delimeter: "│", trim: true, hasOuterBorders: true) }.orThrow(RuntimeError("Failed to parse own table header")) - return (TableConfig(header: headerOverride ?? parsedHeader, type: FileType.table, delimeter: "│", trim: true), []) + let dataRows = [reader.readLine(), reader.readLine(), reader.readLine()].compactMap{$0}.filter { !ParsedTable.technicalRow($0) } + let types = userTypes ?? CellType.infer(rows: dataRows.map { try! ParsedTable.readRowComponents($0, type: .cassandraSql, delimeter: "|", trim: true) }) + let header = (headerOverride ?? parsedHeader).withTypes(types) + + return (TableConfig(header: header, type: FileType.table, delimeter: "│", trim: true), dataRows) } else if row.matches(ParsedTable.sqlHeaderPattern) { // SQL table header used in MySQL/MariaDB like '+----+-------+' - if (Global.debug) { print("Detected SQL like table format") } + debug("Detected SQL like table format") + let parsedHeader = try reader.readLine().map { try! Header(data: $0, delimeter: "|", trim: true, hasOuterBorders: true) }.orThrow(RuntimeError("Failed to parse SQL like header")) - return (TableConfig(header: headerOverride ?? parsedHeader, type: FileType.sql, delimeter: "|", trim: true), []) + let dataRows = [reader.readLine(), reader.readLine(), reader.readLine()].compactMap{$0}.filter { !ParsedTable.technicalRow($0) } + let types = userTypes ?? CellType.infer(rows: dataRows.map { try! ParsedTable.readRowComponents($0, type: .cassandraSql, delimeter: "|", trim: true) }) + let header = (headerOverride ?? parsedHeader).withTypes(types) + + return (TableConfig(header: header, type: FileType.sql, delimeter: "|", trim: true), dataRows) } else if row.matches("^([A-Za-z_0-9\\s]+\\|\\s*)+[A-Za-z_0-9\\s]+$") { // Cassandra like header: name | name2 | name3 - if (Global.debug) { print("Detected Cassandra like table format") } - let header = try! Header(data: row, delimeter: "|", trim: true, hasOuterBorders: false) - return (TableConfig(header: headerOverride ?? header, type: FileType.cassandraSql, delimeter: "|", trim: true), []) + debug("Detected Cassandra like table format") + + let dataRows = [reader.readLine(), reader.readLine(), reader.readLine()].compactMap{$0}.filter { !ParsedTable.technicalRow($0) } + let types = userTypes ?? CellType.infer(rows: dataRows.map { try! ParsedTable.readRowComponents($0, type: .cassandraSql, delimeter: "|", trim: true) }) + + let header = try! (headerOverride ?? Header(data: row, delimeter: "|", trim: true, hasOuterBorders: false, types: types)).withTypes(types) + return (TableConfig(header: header, type: FileType.cassandraSql, delimeter: "|", trim: true), dataRows) } else { - if (Global.debug) { print("Detected CSV like table format") } + debug("Detected CSV like table format") let delimeters = delimeter.map{ [$0] } ?? [",", ";", "\t", " ", "|"] // Pre-read up to 2 rows and apply delimeter to the header and rows. @@ -161,10 +160,14 @@ class ParsedTable: Table { debug("Found delimeter '\(d)'") if try! dataRows.allSatisfy({ (try Csv.parseLine($0, delimeter: d).count) == colsCount}) { - let header: Header = try! (hasHeader ?? true) ? Header(data: row, delimeter: d, trim: false, hasOuterBorders: false) : Header.auto(size: 1) + let readHeader: Header = try! (hasHeader ?? true) ? Header(data: row, delimeter: d, trim: false, hasOuterBorders: false) : Header.auto(size: 1) debug("Detected as CSV format with header separated by '\(d)' with \(colsCount) columns") + let cachedRows = (hasHeader ?? true) ? dataRows : ([row] + dataRows) - return (TableConfig(header: headerOverride ?? header, type: FileType.csv, delimeter: d, trim: false), cachedRows) + let types = CellType.infer(rows: cachedRows.map { try! Csv.parseLine($0, delimeter: d) }) + let header = (headerOverride ?? readHeader).withTypes(types) + + return (TableConfig(header: header, type: FileType.csv, delimeter: d, trim: false), cachedRows) } else { debug("Columns count mismatch") } @@ -179,4 +182,27 @@ class ParsedTable: Table { return nil // Empty file } } + + private static func readRowComponents(_ row: String, type: FileType, delimeter: String, trim: Bool, hasOuterBorders: Bool = false) throws -> [String] { + if type == .csv { + return try! Csv.parseLine(row, delimeter: delimeter) + } + + var components = row.components(separatedBy: delimeter) + + if trim { + components = components.map { $0.trimmingCharacters(in: .whitespacesAndNewlines) } + } + + if hasOuterBorders { + components = components.dropFirst().dropLast() + } + + return components + } + + // matches rows that has to be skipped, usually horizontal delimeters + private static func technicalRow(_ str: String?) -> Bool { + return str?.matches(ParsedTable.technicalRowPattern) ?? false + } } \ No newline at end of file diff --git a/Sources/table/TableView.swift b/Sources/table/TableView.swift index 73539fd..dcb65bb 100644 --- a/Sources/table/TableView.swift +++ b/Sources/table/TableView.swift @@ -40,7 +40,7 @@ class NewColumnsTableView: Table { var header: Header { get { - return self.table.header + Header(components: additionalColumns.map { $0.0 }) + return self.table.header + Header(components: additionalColumns.map { $0.0 }, types: additionalColumns.map { _ in CellType.string }) } } @@ -77,7 +77,10 @@ class ColumnsTableView: Table { init(table: any Table, visibleColumns: [String]) { self.table = table self.visibleColumns = visibleColumns - self.header = Header(components: visibleColumns) + let types = visibleColumns.map { name in + table.header.index(ofColumn: name).map {idx in table.header.types[idx]} ?? .string + } + self.header = Header(components: visibleColumns, types: types) } func next() -> Row? { diff --git a/Sources/table/Version.swift b/Sources/table/Version.swift index 90804ff..4f874f2 100644 --- a/Sources/table/Version.swift +++ b/Sources/table/Version.swift @@ -1 +1 @@ -let appVersion = "development (2022)" \ No newline at end of file +let appVersion = "development (2025)" \ No newline at end of file diff --git a/Tests/table-Tests/CellTypesTests.swift b/Tests/table-Tests/CellTypesTests.swift new file mode 100644 index 0000000..2203b9d --- /dev/null +++ b/Tests/table-Tests/CellTypesTests.swift @@ -0,0 +1,29 @@ +import XCTest +@testable import table + +class CellTypesTests: XCTestCase { + + func testParsesTypesCorrectly() throws { + let types = try CellType.fromStringList("date,string,number,boolean") + XCTAssertEqual(types, [.date, .string, .number, .boolean]) + } + + func testInfersTypesCorrectly() throws { + let types = CellType.infer(rows: [ + ["2024-06-01 00:11:00", "Hello", "123", "true"], + ["2024-06-02 01:01:00", "World", "456.78", "false"] + ]) + + XCTAssertEqual(types, [.date, .string, .number, .boolean]) + } + + func testCorrectsInferenceOnLaterColumns() throws { + let types = CellType.infer(rows: [ + ["2024-06-01 00:11:00", "Hello", "123", "true"], + ["2024-06-02 01:01:00", "World", "456.78", "false"], + ["string", "World", "not a number", "unknown"] + ]) + + XCTAssertEqual(types, [.string, .string, .string, .string]) + } +} diff --git a/Tests/table-Tests/FilterTests.swift b/Tests/table-Tests/FilterTests.swift index bd36759..b2e23a2 100644 --- a/Tests/table-Tests/FilterTests.swift +++ b/Tests/table-Tests/FilterTests.swift @@ -2,7 +2,7 @@ import XCTest @testable import table class FilterTests: XCTestCase { - let header = Header(components: ["col1", "col2"]) + let header = Header(components: ["col1", "col2"], types: [.string, .string]) func testComparesNumbersCorrectly() throws { let filter = try Filter.compile(filter: "col1 > 12", header: header) From 7b1bd6af4b8fbfb6efd0fd3daf1ed36da279d69a Mon Sep 17 00:00:00 2001 From: Sergey Khruschak Date: Sun, 29 Jun 2025 15:53:50 +0300 Subject: [PATCH 2/2] Format identification fixed --- Sources/table/CellType.swift | 8 +++++++- Sources/table/Table.swift | 8 ++++---- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/Sources/table/CellType.swift b/Sources/table/CellType.swift index c10bf31..475d549 100644 --- a/Sources/table/CellType.swift +++ b/Sources/table/CellType.swift @@ -39,6 +39,7 @@ enum CellType { } // Infers cell types from the first few rows of data + // TODO: handle more complex cases when the first row is null static func infer(rows: [[String]]) -> [CellType] { let dateFormat = DateFormatter() dateFormat.dateFormat = "yyyy-MM-dd hh:mm:ss" @@ -60,7 +61,12 @@ enum CellType { for row in rows.dropFirst() { for (idx, value) in row.enumerated() { let type = types[idx] - if type == .number && !value.isNumber { + + if value.caseInsensitiveCompare("null") == .orderedSame { + continue // skip null values as they don't affect type inference + } + + if type == .number && !value.isNumber{ types[idx] = .string } else if type == .date && !value.isDate { types[idx] = .string diff --git a/Sources/table/Table.swift b/Sources/table/Table.swift index c39b361..f8e4b2f 100644 --- a/Sources/table/Table.swift +++ b/Sources/table/Table.swift @@ -65,7 +65,7 @@ class ParsedTable: Table { } return try! row.map { row in - let components = try ParsedTable.readRowComponents(row, type: conf.type, delimeter: conf.delimeter, trim: conf.trim, hasOuterBorders: FileType.hasOuterBorders(type: conf.type)) + let components = try ParsedTable.readRowComponents(row, type: conf.type, delimeter: conf.delimeter, trim: conf.trim) return Row( header: conf.header, @@ -130,7 +130,7 @@ class ParsedTable: Table { }.orThrow(RuntimeError("Failed to parse SQL like header")) let dataRows = [reader.readLine(), reader.readLine(), reader.readLine()].compactMap{$0}.filter { !ParsedTable.technicalRow($0) } - let types = userTypes ?? CellType.infer(rows: dataRows.map { try! ParsedTable.readRowComponents($0, type: .cassandraSql, delimeter: "|", trim: true) }) + let types = userTypes ?? CellType.infer(rows: dataRows.map { try! ParsedTable.readRowComponents($0, type: .sql, delimeter: "|", trim: true) }) let header = (headerOverride ?? parsedHeader).withTypes(types) return (TableConfig(header: header, type: FileType.sql, delimeter: "|", trim: true), dataRows) @@ -183,7 +183,7 @@ class ParsedTable: Table { } } - private static func readRowComponents(_ row: String, type: FileType, delimeter: String, trim: Bool, hasOuterBorders: Bool = false) throws -> [String] { + private static func readRowComponents(_ row: String, type: FileType, delimeter: String, trim: Bool) throws -> [String] { if type == .csv { return try! Csv.parseLine(row, delimeter: delimeter) } @@ -194,7 +194,7 @@ class ParsedTable: Table { components = components.map { $0.trimmingCharacters(in: .whitespacesAndNewlines) } } - if hasOuterBorders { + if FileType.hasOuterBorders(type: type) { components = components.dropFirst().dropLast() }