diff options
Diffstat (limited to 'yaml-cpp/src/scantoken.cpp')
-rwxr-xr-x | yaml-cpp/src/scantoken.cpp | 439 |
1 files changed, 439 insertions, 0 deletions
diff --git a/yaml-cpp/src/scantoken.cpp b/yaml-cpp/src/scantoken.cpp new file mode 100755 index 00000000..06d9cd62 --- /dev/null +++ b/yaml-cpp/src/scantoken.cpp @@ -0,0 +1,439 @@ +#include "scanner.h" +#include "token.h" +#include "yaml-cpp/exceptions.h" +#include "exp.h" +#include "scanscalar.h" +#include "scantag.h" +#include "tag.h" +#include <sstream> + +namespace YAML +{ + /////////////////////////////////////////////////////////////////////// + // Specialization for scanning specific tokens + + // Directive + // . Note: no semantic checking is done here (that's for the parser to do) + void Scanner::ScanDirective() + { + std::string name; + std::vector <std::string> params; + + // pop indents and simple keys + PopAllIndents(); + PopAllSimpleKeys(); + + m_simpleKeyAllowed = false; + m_canBeJSONFlow = false; + + // store pos and eat indicator + Token token(Token::DIRECTIVE, INPUT.mark()); + INPUT.eat(1); + + // read name + while(INPUT && !Exp::BlankOrBreak().Matches(INPUT)) + token.value += INPUT.get(); + + // read parameters + while(1) { + // first get rid of whitespace + while(Exp::Blank().Matches(INPUT)) + INPUT.eat(1); + + // break on newline or comment + if(!INPUT || Exp::Break().Matches(INPUT) || Exp::Comment().Matches(INPUT)) + break; + + // now read parameter + std::string param; + while(INPUT && !Exp::BlankOrBreak().Matches(INPUT)) + param += INPUT.get(); + + token.params.push_back(param); + } + + m_tokens.push(token); + } + + // DocStart + void Scanner::ScanDocStart() + { + PopAllIndents(); + PopAllSimpleKeys(); + m_simpleKeyAllowed = false; + m_canBeJSONFlow = false; + + // eat + Mark mark = INPUT.mark(); + INPUT.eat(3); + m_tokens.push(Token(Token::DOC_START, mark)); + } + + // DocEnd + void Scanner::ScanDocEnd() + { + PopAllIndents(); + PopAllSimpleKeys(); + m_simpleKeyAllowed = false; + m_canBeJSONFlow = false; + + // eat + Mark mark = INPUT.mark(); + INPUT.eat(3); + m_tokens.push(Token(Token::DOC_END, mark)); + } + + // FlowStart + void Scanner::ScanFlowStart() + { + // flows can be simple keys + InsertPotentialSimpleKey(); + m_simpleKeyAllowed = true; + m_canBeJSONFlow = false; + + // eat + Mark mark = INPUT.mark(); + char ch = INPUT.get(); + FLOW_MARKER flowType = (ch == Keys::FlowSeqStart ? FLOW_SEQ : FLOW_MAP); + m_flows.push(flowType); + Token::TYPE type = (flowType == FLOW_SEQ ? Token::FLOW_SEQ_START : Token::FLOW_MAP_START); + m_tokens.push(Token(type, mark)); + } + + // FlowEnd + void Scanner::ScanFlowEnd() + { + if(InBlockContext()) + throw ParserException(INPUT.mark(), ErrorMsg::FLOW_END); + + // we might have a solo entry in the flow context + if(InFlowContext()) { + if(m_flows.top() == FLOW_MAP && VerifySimpleKey()) + m_tokens.push(Token(Token::VALUE, INPUT.mark())); + else if(m_flows.top() == FLOW_SEQ) + InvalidateSimpleKey(); + } + + m_simpleKeyAllowed = false; + m_canBeJSONFlow = true; + + // eat + Mark mark = INPUT.mark(); + char ch = INPUT.get(); + + // check that it matches the start + FLOW_MARKER flowType = (ch == Keys::FlowSeqEnd ? FLOW_SEQ : FLOW_MAP); + if(m_flows.top() != flowType) + throw ParserException(mark, ErrorMsg::FLOW_END); + m_flows.pop(); + + Token::TYPE type = (flowType ? Token::FLOW_SEQ_END : Token::FLOW_MAP_END); + m_tokens.push(Token(type, mark)); + } + + // FlowEntry + void Scanner::ScanFlowEntry() + { + // we might have a solo entry in the flow context + if(InFlowContext()) { + if(m_flows.top() == FLOW_MAP && VerifySimpleKey()) + m_tokens.push(Token(Token::VALUE, INPUT.mark())); + else if(m_flows.top() == FLOW_SEQ) + InvalidateSimpleKey(); + } + + m_simpleKeyAllowed = true; + m_canBeJSONFlow = false; + + // eat + Mark mark = INPUT.mark(); + INPUT.eat(1); + m_tokens.push(Token(Token::FLOW_ENTRY, mark)); + } + + // BlockEntry + void Scanner::ScanBlockEntry() + { + // we better be in the block context! + if(InFlowContext()) + throw ParserException(INPUT.mark(), ErrorMsg::BLOCK_ENTRY); + + // can we put it here? + if(!m_simpleKeyAllowed) + throw ParserException(INPUT.mark(), ErrorMsg::BLOCK_ENTRY); + + PushIndentTo(INPUT.column(), IndentMarker::SEQ); + m_simpleKeyAllowed = true; + m_canBeJSONFlow = false; + + // eat + Mark mark = INPUT.mark(); + INPUT.eat(1); + m_tokens.push(Token(Token::BLOCK_ENTRY, mark)); + } + + // Key + void Scanner::ScanKey() + { + // handle keys diffently in the block context (and manage indents) + if(InBlockContext()) { + if(!m_simpleKeyAllowed) + throw ParserException(INPUT.mark(), ErrorMsg::MAP_KEY); + + PushIndentTo(INPUT.column(), IndentMarker::MAP); + } + + // can only put a simple key here if we're in block context + m_simpleKeyAllowed = InBlockContext(); + + // eat + Mark mark = INPUT.mark(); + INPUT.eat(1); + m_tokens.push(Token(Token::KEY, mark)); + } + + // Value + void Scanner::ScanValue() + { + // and check that simple key + bool isSimpleKey = VerifySimpleKey(); + m_canBeJSONFlow = false; + + if(isSimpleKey) { + // can't follow a simple key with another simple key (dunno why, though - it seems fine) + m_simpleKeyAllowed = false; + } else { + // handle values diffently in the block context (and manage indents) + if(InBlockContext()) { + if(!m_simpleKeyAllowed) + throw ParserException(INPUT.mark(), ErrorMsg::MAP_VALUE); + + PushIndentTo(INPUT.column(), IndentMarker::MAP); + } + + // can only put a simple key here if we're in block context + m_simpleKeyAllowed = InBlockContext(); + } + + // eat + Mark mark = INPUT.mark(); + INPUT.eat(1); + m_tokens.push(Token(Token::VALUE, mark)); + } + + // AnchorOrAlias + void Scanner::ScanAnchorOrAlias() + { + bool alias; + std::string name; + + // insert a potential simple key + InsertPotentialSimpleKey(); + m_simpleKeyAllowed = false; + m_canBeJSONFlow = false; + + // eat the indicator + Mark mark = INPUT.mark(); + char indicator = INPUT.get(); + alias = (indicator == Keys::Alias); + + // now eat the content + while(INPUT && Exp::Anchor().Matches(INPUT)) + name += INPUT.get(); + + // we need to have read SOMETHING! + if(name.empty()) + throw ParserException(INPUT.mark(), alias ? ErrorMsg::ALIAS_NOT_FOUND : ErrorMsg::ANCHOR_NOT_FOUND); + + // and needs to end correctly + if(INPUT && !Exp::AnchorEnd().Matches(INPUT)) + throw ParserException(INPUT.mark(), alias ? ErrorMsg::CHAR_IN_ALIAS : ErrorMsg::CHAR_IN_ANCHOR); + + // and we're done + Token token(alias ? Token::ALIAS : Token::ANCHOR, mark); + token.value = name; + m_tokens.push(token); + } + + // Tag + void Scanner::ScanTag() + { + // insert a potential simple key + InsertPotentialSimpleKey(); + m_simpleKeyAllowed = false; + m_canBeJSONFlow = false; + + Token token(Token::TAG, INPUT.mark()); + + // eat the indicator + INPUT.get(); + + if(INPUT && INPUT.peek() == Keys::VerbatimTagStart){ + std::string tag = ScanVerbatimTag(INPUT); + + token.value = tag; + token.data = Tag::VERBATIM; + } else { + bool canBeHandle; + token.value = ScanTagHandle(INPUT, canBeHandle); + if(!canBeHandle && token.value.empty()) + token.data = Tag::NON_SPECIFIC; + else if(token.value.empty()) + token.data = Tag::SECONDARY_HANDLE; + else + token.data = Tag::PRIMARY_HANDLE; + + // is there a suffix? + if(canBeHandle && INPUT.peek() == Keys::Tag) { + // eat the indicator + INPUT.get(); + token.params.push_back(ScanTagSuffix(INPUT)); + token.data = Tag::NAMED_HANDLE; + } + } + + m_tokens.push(token); + } + + // PlainScalar + void Scanner::ScanPlainScalar() + { + std::string scalar; + + // set up the scanning parameters + ScanScalarParams params; + params.end = (InFlowContext() ? Exp::EndScalarInFlow() : Exp::EndScalar()) || (Exp::BlankOrBreak() + Exp::Comment()); + params.eatEnd = false; + params.indent = (InFlowContext() ? 0 : GetTopIndent() + 1); + params.fold = FOLD_FLOW; + params.eatLeadingWhitespace = true; + params.trimTrailingSpaces = true; + params.chomp = STRIP; + params.onDocIndicator = BREAK; + params.onTabInIndentation = THROW; + + // insert a potential simple key + InsertPotentialSimpleKey(); + + Mark mark = INPUT.mark(); + scalar = ScanScalar(INPUT, params); + + // can have a simple key only if we ended the scalar by starting a new line + m_simpleKeyAllowed = params.leadingSpaces; + m_canBeJSONFlow = false; + + // finally, check and see if we ended on an illegal character + //if(Exp::IllegalCharInScalar.Matches(INPUT)) + // throw ParserException(INPUT.mark(), ErrorMsg::CHAR_IN_SCALAR); + + Token token(Token::PLAIN_SCALAR, mark); + token.value = scalar; + m_tokens.push(token); + } + + // QuotedScalar + void Scanner::ScanQuotedScalar() + { + std::string scalar; + + // peek at single or double quote (don't eat because we need to preserve (for the time being) the input position) + char quote = INPUT.peek(); + bool single = (quote == '\''); + + // setup the scanning parameters + ScanScalarParams params; + params.end = (single ? RegEx(quote) && !Exp::EscSingleQuote() : RegEx(quote)); + params.eatEnd = true; + params.escape = (single ? '\'' : '\\'); + params.indent = 0; + params.fold = FOLD_FLOW; + params.eatLeadingWhitespace = true; + params.trimTrailingSpaces = false; + params.chomp = CLIP; + params.onDocIndicator = THROW; + + // insert a potential simple key + InsertPotentialSimpleKey(); + + Mark mark = INPUT.mark(); + + // now eat that opening quote + INPUT.get(); + + // and scan + scalar = ScanScalar(INPUT, params); + m_simpleKeyAllowed = false; + m_canBeJSONFlow = true; + + Token token(Token::NON_PLAIN_SCALAR, mark); + token.value = scalar; + m_tokens.push(token); + } + + // BlockScalarToken + // . These need a little extra processing beforehand. + // . We need to scan the line where the indicator is (this doesn't count as part of the scalar), + // and then we need to figure out what level of indentation we'll be using. + void Scanner::ScanBlockScalar() + { + std::string scalar; + + ScanScalarParams params; + params.indent = 1; + params.detectIndent = true; + + // eat block indicator ('|' or '>') + Mark mark = INPUT.mark(); + char indicator = INPUT.get(); + params.fold = (indicator == Keys::FoldedScalar ? FOLD_BLOCK : DONT_FOLD); + + // eat chomping/indentation indicators + params.chomp = CLIP; + int n = Exp::Chomp().Match(INPUT); + for(int i=0;i<n;i++) { + char ch = INPUT.get(); + if(ch == '+') + params.chomp = KEEP; + else if(ch == '-') + params.chomp = STRIP; + else if(Exp::Digit().Matches(ch)) { + if(ch == '0') + throw ParserException(INPUT.mark(), ErrorMsg::ZERO_INDENT_IN_BLOCK); + + params.indent = ch - '0'; + params.detectIndent = false; + } + } + + // now eat whitespace + while(Exp::Blank().Matches(INPUT)) + INPUT.eat(1); + + // and comments to the end of the line + if(Exp::Comment().Matches(INPUT)) + while(INPUT && !Exp::Break().Matches(INPUT)) + INPUT.eat(1); + + // if it's not a line break, then we ran into a bad character inline + if(INPUT && !Exp::Break().Matches(INPUT)) + throw ParserException(INPUT.mark(), ErrorMsg::CHAR_IN_BLOCK); + + // set the initial indentation + if(GetTopIndent() >= 0) + params.indent += GetTopIndent(); + + params.eatLeadingWhitespace = false; + params.trimTrailingSpaces = false; + params.onTabInIndentation = THROW; + + scalar = ScanScalar(INPUT, params); + + // simple keys always ok after block scalars (since we're gonna start a new line anyways) + m_simpleKeyAllowed = true; + m_canBeJSONFlow = false; + + Token token(Token::NON_PLAIN_SCALAR, mark); + token.value = scalar; + m_tokens.push(token); + } +} |