From 4a4af4d6c33ae9e146993a39ee8faf2a01675119 Mon Sep 17 00:00:00 2001 From: Herbert Pimentel Date: Thu, 21 Feb 2019 13:34:33 -0500 Subject: [PATCH 1/2] Add support to ignore lines during the parse --- src/main/java/com/sonalake/utah/Parser.java | 52 +++++++++++-------- .../java/com/sonalake/utah/config/Config.java | 31 +++++++++++ .../com/sonalake/utah/config/Ignorer.java | 38 ++++++++++++++ 3 files changed, 98 insertions(+), 23 deletions(-) create mode 100644 src/main/java/com/sonalake/utah/config/Ignorer.java diff --git a/src/main/java/com/sonalake/utah/Parser.java b/src/main/java/com/sonalake/utah/Parser.java index 269e1f3..d169505 100644 --- a/src/main/java/com/sonalake/utah/Parser.java +++ b/src/main/java/com/sonalake/utah/Parser.java @@ -93,34 +93,40 @@ private String getNextRecord(boolean isSelectingHeader) { boolean wasDelimMatched = false; while (!isRecordLoaded) { String currentLine = reader.readLine(); - if (null == currentLine) { - isReaderFinished = true; - isRecordLoaded = true; + boolean shouldIgnoreLine = config.isIgnorable(currentLine); + + if (shouldIgnoreLine) { + continue; } else { - if (StringUtils.isNotBlank(previousDelim)) { - buffer.append(previousDelim + "\n"); - previousDelim = ""; - } - if (isSelectingHeader && config.matchesHeaderDelim(currentLine)) { + if (null == currentLine) { + isReaderFinished = true; isRecordLoaded = true; - } else if (!isSelectingHeader && config.matchesRecordDelim(currentLine)) { - Delimiter applicableDelim = config.getApplicableDelim(currentLine); - // if the delimiter says we're at the start of the record, - // and this is the first record, we need to treat it differently - boolean isFirstDelimOfInterest = 0 == recordNumber && !wasDelimMatched; - if (applicableDelim.isDelimAtStartOfRecord() && isFirstDelimOfInterest) { - // this is the first record, so we don't stop here - wasDelimMatched = true; - } else { - if (applicableDelim.isRetainDelim()) { - previousDelim = currentLine; - } + } else { + if (StringUtils.isNotBlank(previousDelim)) { + buffer.append(previousDelim + "\n"); + previousDelim = ""; + } + if (isSelectingHeader && config.matchesHeaderDelim(currentLine)) { isRecordLoaded = true; + } else if (!isSelectingHeader && config.matchesRecordDelim(currentLine)) { + Delimiter applicableDelim = config.getApplicableDelim(currentLine); + // if the delimiter says we're at the start of the record, + // and this is the first record, we need to treat it differently + boolean isFirstDelimOfInterest = 0 == recordNumber && !wasDelimMatched; + if (applicableDelim.isDelimAtStartOfRecord() && isFirstDelimOfInterest) { + // this is the first record, so we don't stop here + wasDelimMatched = true; + } else { + if (applicableDelim.isRetainDelim()) { + previousDelim = currentLine; + } + isRecordLoaded = true; + } } } - } - if (StringUtils.isNotBlank(currentLine)) { - buffer.append(currentLine + "\n"); + if (StringUtils.isNotBlank(currentLine)) { + buffer.append(currentLine + "\n"); + } } } if (isReaderFinished && buffer.length() == 0) { diff --git a/src/main/java/com/sonalake/utah/config/Config.java b/src/main/java/com/sonalake/utah/config/Config.java index 147706c..7ff5f97 100644 --- a/src/main/java/com/sonalake/utah/config/Config.java +++ b/src/main/java/com/sonalake/utah/config/Config.java @@ -49,6 +49,13 @@ public class Config { @XmlElement(name = "value") protected List values; + /** + * Lines to be ignored + */ + @XmlElement(name = "ignore") + protected List ignores; + + /** * Precompile the patterns, but only do it the once. */ @@ -62,6 +69,12 @@ void compilePatterns() { for (Delimiter delimiter : delimiters) { delimiter.compile(searches); } + + if (null != ignores) { + for (Ignorer ignorer : ignores) { + ignorer.compile(searches); + } + } } /** @@ -149,6 +162,24 @@ public boolean isDelimiterValid() { return true; } + /** + * Validates if the line should be ignored + * + * @return true if the line is ignored during the record build + */ + public boolean isIgnorable(String candidate) { + if (null == candidate || null == ignores || ignores.isEmpty()) { + return false; + } else { + for (Ignorer ignorer : ignores) { + if (ignorer.matches(candidate)) { + return true; + } + } + } + return false; + } + /** * Get the applicable delimiter for the candidate. The first delimiter that matches the text as used. * diff --git a/src/main/java/com/sonalake/utah/config/Ignorer.java b/src/main/java/com/sonalake/utah/config/Ignorer.java new file mode 100644 index 0000000..55a29a7 --- /dev/null +++ b/src/main/java/com/sonalake/utah/config/Ignorer.java @@ -0,0 +1,38 @@ +package com.sonalake.utah.config; + +import javax.xml.bind.annotation.XmlValue; +import java.util.List; +import java.util.regex.Pattern; + +/** + * A header delimiter - used to identify the header where there are values in the header of the file that are to be + * added to every record. + */ +public class Ignorer { + + /** + * Raw delimiter string from the config file + */ + @XmlValue + protected String delimiter; + + /** + * The compiled pattern, this is the one used at runtime + */ + private Pattern compiledPattern; + + public boolean matches(String candidate) { + return compiledPattern.matcher(candidate).matches(); + } + + /** + * Compile the delimiter based on the searches + * + * @param searches the searches, processed in this order + */ + void compile(List searches) { + String valueText = SearchHelper.translate(delimiter, searches); + compiledPattern = Pattern.compile(".*?" + valueText + ".*?"); + } + +} From d5cb49d891592536d87778596d15bd2156331f35 Mon Sep 17 00:00:00 2001 From: Herbert Pimentel Date: Fri, 8 Mar 2019 10:59:01 -0500 Subject: [PATCH 2/2] uses jackson instead of jaxb --- .../java/com/sonalake/utah/config/Config.java | 26 +++++++++---------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/src/main/java/com/sonalake/utah/config/Config.java b/src/main/java/com/sonalake/utah/config/Config.java index 16be8e4..e810524 100644 --- a/src/main/java/com/sonalake/utah/config/Config.java +++ b/src/main/java/com/sonalake/utah/config/Config.java @@ -1,12 +1,10 @@ package com.sonalake.utah.config; -import com.fasterxml.jackson.annotation.JsonIgnoreProperties; import com.fasterxml.jackson.dataformat.xml.annotation.JacksonXmlElementWrapper; import com.fasterxml.jackson.dataformat.xml.annotation.JacksonXmlProperty; import com.fasterxml.jackson.dataformat.xml.annotation.JacksonXmlRootElement; import org.apache.commons.lang3.StringUtils; - import java.util.Collections; import java.util.List; import java.util.Map; @@ -26,7 +24,8 @@ public class Config { protected List delimiters; /** - * The list of searches that are to be used by values section to find values using a regex + * The list of searches that are to be used by values section to find values + * using a regex */ @JacksonXmlElementWrapper(localName = "searches") @JacksonXmlProperty(localName = "search") @@ -50,10 +49,10 @@ public class Config { /** * Lines to be ignored */ - @XmlElement(name = "ignore") + @JacksonXmlElementWrapper(useWrapping = false) + @JacksonXmlProperty(localName = "ignore") protected List ignores; - /** * Precompile the patterns, but only do it the once. */ @@ -71,7 +70,7 @@ void compilePatterns() { if (null != ignores) { for (Ignorer ignorer : ignores) { ignorer.compile(searches); - } + } } } @@ -143,7 +142,8 @@ public boolean matchesHeaderDelim(String candidate) { } /** - * Validates if the delimiters are valid. Checks all the delimiters to see if they are well-forrmed + * Validates if the delimiters are valid. Checks all the delimiters to see if + * they are well-forrmed * * @return true if the delimiters are valid */ @@ -172,14 +172,15 @@ public boolean isIgnorable(String candidate) { for (Ignorer ignorer : ignores) { if (ignorer.matches(candidate)) { return true; - } + } } } return false; - } + } /** - * Get the applicable delimiter for the candidate. The first delimiter that matches the text as used. + * Get the applicable delimiter for the candidate. The first delimiter that + * matches the text as used. * * @param candidate the candidate text * @return the applicable delimiter, or null if there are none. @@ -214,10 +215,7 @@ public boolean hasHeaderDelim() { @Override public String toString() { - return String.format( - "CLIConfig: delim [%s], searches: [%s], values: [%s]", - delimiters, searches, values - ); + return String.format("CLIConfig: delim [%s], searches: [%s], values: [%s]", delimiters, searches, values); } }