abstract class CsvFormatDetector extends java.lang.Object implements InputAnalysisProcess
InputAnalysisProcess
to detect column delimiters, quotes and quote escapes in a CSV input.Modifier and Type | Field and Description |
---|---|
private char |
comment |
private int |
MAX_ROW_SAMPLES |
private char |
normalizedNewLine |
private char |
suggestedDelimiter |
private int |
whitespaceRangeStart |
Constructor and Description |
---|
CsvFormatDetector(int maxRowSamples,
CsvParserSettings settings,
int whitespaceRangeStart)
Builds a new
CsvFormatDetector |
Modifier and Type | Method and Description |
---|---|
(package private) abstract void |
apply(char delimiter,
char quote,
char quoteEscape)
Applies the discovered CSV format elements to the
CsvParser |
private java.util.Map<java.lang.Character,java.lang.Integer> |
calculateTotals(java.util.List<java.util.Map<java.lang.Character,java.lang.Integer>> symbolsPerRow) |
void |
execute(char[] characters,
int length)
A sequence of characters of the input buffer to be analyzed.
|
private static char |
getChar(java.util.Map<java.lang.Character,java.lang.Integer> map,
java.util.Map<java.lang.Character,java.lang.Integer> totals,
char defaultChar,
boolean min)
Returns the character with the highest or lowest associated number.
|
private static void |
increment(java.util.Map<java.lang.Character,java.lang.Integer> map,
char symbol)
Increments the number associated with a character in a map by 1
|
private static void |
increment(java.util.Map<java.lang.Character,java.lang.Integer> map,
char symbol,
int incrementSize)
Increments the number associated with a character in a map
|
private static boolean |
isSymbol(char ch) |
private static char |
max(java.util.Map<java.lang.Character,java.lang.Integer> map,
java.util.Map<java.lang.Character,java.lang.Integer> totals,
char defaultChar)
Returns the character with the highest associated number.
|
private static char |
min(java.util.Map<java.lang.Character,java.lang.Integer> map,
java.util.Map<java.lang.Character,java.lang.Integer> totals,
char defaultChar)
Returns the character with the lowest associated number.
|
private final int MAX_ROW_SAMPLES
private final char comment
private final char suggestedDelimiter
private final char normalizedNewLine
private final int whitespaceRangeStart
CsvFormatDetector(int maxRowSamples, CsvParserSettings settings, int whitespaceRangeStart)
CsvFormatDetector
maxRowSamples
- the number of row samples to collect before analyzing the statisticssettings
- the configuration provided by the user with potential defaults in case the detection is unable to discover the proper column delimiter or quote character.whitespaceRangeStart
- starting range of characters considered to be whitespace.private java.util.Map<java.lang.Character,java.lang.Integer> calculateTotals(java.util.List<java.util.Map<java.lang.Character,java.lang.Integer>> symbolsPerRow)
public void execute(char[] characters, int length)
InputAnalysisProcess
execute
in interface InputAnalysisProcess
characters
- the input bufferlength
- the last character position loaded into the buffer.private static void increment(java.util.Map<java.lang.Character,java.lang.Integer> map, char symbol)
map
- the map of characters and their numberssymbol
- the character whose number should be incrementprivate static void increment(java.util.Map<java.lang.Character,java.lang.Integer> map, char symbol, int incrementSize)
map
- the map of characters and their numberssymbol
- the character whose number should be incrementincrementSize
- the size of the incrementprivate static char min(java.util.Map<java.lang.Character,java.lang.Integer> map, java.util.Map<java.lang.Character,java.lang.Integer> totals, char defaultChar)
map
- the map of characters and their numbersdefaultChar
- the default character to return in case the map is emptyprivate static char max(java.util.Map<java.lang.Character,java.lang.Integer> map, java.util.Map<java.lang.Character,java.lang.Integer> totals, char defaultChar)
map
- the map of characters and their numbersdefaultChar
- the default character to return in case the map is emptyprivate static char getChar(java.util.Map<java.lang.Character,java.lang.Integer> map, java.util.Map<java.lang.Character,java.lang.Integer> totals, char defaultChar, boolean min)
map
- the map of characters and their numbersdefaultChar
- the default character to return in case the map is emptymin
- a flag indicating whether to return the character associated with the lowest number in the map.
If false
then the character associated with the highest number found will be returned.private static boolean isSymbol(char ch)
abstract void apply(char delimiter, char quote, char quoteEscape)
CsvParser
delimiter
- the discovered delimiter characterquote
- the discovered quote characterquoteEscape
- the discovered quote escape character.