diff options
Diffstat (limited to 'yaml-cpp/src/scanscalar.cpp')
-rwxr-xr-x | yaml-cpp/src/scanscalar.cpp | 214 |
1 files changed, 214 insertions, 0 deletions
diff --git a/yaml-cpp/src/scanscalar.cpp b/yaml-cpp/src/scanscalar.cpp new file mode 100755 index 00000000..064c0867 --- /dev/null +++ b/yaml-cpp/src/scanscalar.cpp @@ -0,0 +1,214 @@ +#include "scanscalar.h" +#include "scanner.h" +#include "exp.h" +#include "yaml-cpp/exceptions.h" +#include "token.h" + +namespace YAML +{ + // ScanScalar + // . This is where the scalar magic happens. + // + // . We do the scanning in three phases: + // 1. Scan until newline + // 2. Eat newline + // 3. Scan leading blanks. + // + // . Depending on the parameters given, we store or stop + // and different places in the above flow. + std::string ScanScalar(Stream& INPUT, ScanScalarParams& params) + { + bool foundNonEmptyLine = false; + bool pastOpeningBreak = (params.fold == FOLD_FLOW); + bool emptyLine = false, moreIndented = false; + int foldedNewlineCount = 0; + bool foldedNewlineStartedMoreIndented = false; + std::size_t lastEscapedChar = std::string::npos; + std::string scalar; + params.leadingSpaces = false; + + while(INPUT) { + // ******************************** + // Phase #1: scan until line ending + + std::size_t lastNonWhitespaceChar = scalar.size(); + bool escapedNewline = false; + while(!params.end.Matches(INPUT) && !Exp::Break().Matches(INPUT)) { + if(!INPUT) + break; + + // document indicator? + if(INPUT.column() == 0 && Exp::DocIndicator().Matches(INPUT)) { + if(params.onDocIndicator == BREAK) + break; + else if(params.onDocIndicator == THROW) + throw ParserException(INPUT.mark(), ErrorMsg::DOC_IN_SCALAR); + } + + foundNonEmptyLine = true; + pastOpeningBreak = true; + + // escaped newline? (only if we're escaping on slash) + if(params.escape == '\\' && Exp::EscBreak().Matches(INPUT)) { + // eat escape character and get out (but preserve trailing whitespace!) + INPUT.get(); + lastNonWhitespaceChar = scalar.size(); + lastEscapedChar = scalar.size(); + escapedNewline = true; + break; + } + + // escape this? + if(INPUT.peek() == params.escape) { + scalar += Exp::Escape(INPUT); + lastNonWhitespaceChar = scalar.size(); + lastEscapedChar = scalar.size(); + continue; + } + + // otherwise, just add the damn character + char ch = INPUT.get(); + scalar += ch; + if(ch != ' ' && ch != '\t') + lastNonWhitespaceChar = scalar.size(); + } + + // eof? if we're looking to eat something, then we throw + if(!INPUT) { + if(params.eatEnd) + throw ParserException(INPUT.mark(), ErrorMsg::EOF_IN_SCALAR); + break; + } + + // doc indicator? + if(params.onDocIndicator == BREAK && INPUT.column() == 0 && Exp::DocIndicator().Matches(INPUT)) + break; + + // are we done via character match? + int n = params.end.Match(INPUT); + if(n >= 0) { + if(params.eatEnd) + INPUT.eat(n); + break; + } + + // do we remove trailing whitespace? + if(params.fold == FOLD_FLOW) + scalar.erase(lastNonWhitespaceChar); + + // ******************************** + // Phase #2: eat line ending + n = Exp::Break().Match(INPUT); + INPUT.eat(n); + + // ******************************** + // Phase #3: scan initial spaces + + // first the required indentation + while(INPUT.peek() == ' ' && (INPUT.column() < params.indent || (params.detectIndent && !foundNonEmptyLine))) + INPUT.eat(1); + + // update indent if we're auto-detecting + if(params.detectIndent && !foundNonEmptyLine) + params.indent = std::max(params.indent, INPUT.column()); + + // and then the rest of the whitespace + while(Exp::Blank().Matches(INPUT)) { + // we check for tabs that masquerade as indentation + if(INPUT.peek() == '\t'&& INPUT.column() < params.indent && params.onTabInIndentation == THROW) + throw ParserException(INPUT.mark(), ErrorMsg::TAB_IN_INDENTATION); + + if(!params.eatLeadingWhitespace) + break; + + INPUT.eat(1); + } + + // was this an empty line? + bool nextEmptyLine = Exp::Break().Matches(INPUT); + bool nextMoreIndented = Exp::Blank().Matches(INPUT); + if(params.fold == FOLD_BLOCK && foldedNewlineCount == 0 && nextEmptyLine) + foldedNewlineStartedMoreIndented = moreIndented; + + // for block scalars, we always start with a newline, so we should ignore it (not fold or keep) + if(pastOpeningBreak) { + switch(params.fold) { + case DONT_FOLD: + scalar += "\n"; + break; + case FOLD_BLOCK: + if(!emptyLine && !nextEmptyLine && !moreIndented && !nextMoreIndented && INPUT.column() >= params.indent) + scalar += " "; + else if(nextEmptyLine) + foldedNewlineCount++; + else + scalar += "\n"; + + if(!nextEmptyLine && foldedNewlineCount > 0) { + scalar += std::string(foldedNewlineCount - 1, '\n'); + if(foldedNewlineStartedMoreIndented || nextMoreIndented | !foundNonEmptyLine) + scalar += "\n"; + foldedNewlineCount = 0; + } + break; + case FOLD_FLOW: + if(nextEmptyLine) + scalar += "\n"; + else if(!emptyLine && !nextEmptyLine && !escapedNewline) + scalar += " "; + break; + } + } + + emptyLine = nextEmptyLine; + moreIndented = nextMoreIndented; + pastOpeningBreak = true; + + // are we done via indentation? + if(!emptyLine && INPUT.column() < params.indent) { + params.leadingSpaces = true; + break; + } + } + + // post-processing + if(params.trimTrailingSpaces) { + std::size_t pos = scalar.find_last_not_of(' '); + if(lastEscapedChar != std::string::npos) { + if(pos < lastEscapedChar || pos == std::string::npos) + pos = lastEscapedChar; + } + if(pos < scalar.size()) + scalar.erase(pos + 1); + } + + switch(params.chomp) { + case CLIP: { + std::size_t pos = scalar.find_last_not_of('\n'); + if(lastEscapedChar != std::string::npos) { + if(pos < lastEscapedChar || pos == std::string::npos) + pos = lastEscapedChar; + } + if(pos == std::string::npos) + scalar.erase(); + else if(pos + 1 < scalar.size()) + scalar.erase(pos + 2); + } break; + case STRIP: { + std::size_t pos = scalar.find_last_not_of('\n'); + if(lastEscapedChar != std::string::npos) { + if(pos < lastEscapedChar || pos == std::string::npos) + pos = lastEscapedChar; + } + if(pos == std::string::npos) + scalar.erase(); + else if(pos < scalar.size()) + scalar.erase(pos + 1); + } break; + default: + break; + } + + return scalar; + } +} |