summaryrefslogtreecommitdiffstats
path: root/yaml-cpp/src/scantoken.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'yaml-cpp/src/scantoken.cpp')
-rwxr-xr-xyaml-cpp/src/scantoken.cpp439
1 files changed, 439 insertions, 0 deletions
diff --git a/yaml-cpp/src/scantoken.cpp b/yaml-cpp/src/scantoken.cpp
new file mode 100755
index 00000000..06d9cd62
--- /dev/null
+++ b/yaml-cpp/src/scantoken.cpp
@@ -0,0 +1,439 @@
+#include "scanner.h"
+#include "token.h"
+#include "yaml-cpp/exceptions.h"
+#include "exp.h"
+#include "scanscalar.h"
+#include "scantag.h"
+#include "tag.h"
+#include <sstream>
+
+namespace YAML
+{
+ ///////////////////////////////////////////////////////////////////////
+ // Specialization for scanning specific tokens
+
+ // Directive
+ // . Note: no semantic checking is done here (that's for the parser to do)
+ void Scanner::ScanDirective()
+ {
+ std::string name;
+ std::vector <std::string> params;
+
+ // pop indents and simple keys
+ PopAllIndents();
+ PopAllSimpleKeys();
+
+ m_simpleKeyAllowed = false;
+ m_canBeJSONFlow = false;
+
+ // store pos and eat indicator
+ Token token(Token::DIRECTIVE, INPUT.mark());
+ INPUT.eat(1);
+
+ // read name
+ while(INPUT && !Exp::BlankOrBreak().Matches(INPUT))
+ token.value += INPUT.get();
+
+ // read parameters
+ while(1) {
+ // first get rid of whitespace
+ while(Exp::Blank().Matches(INPUT))
+ INPUT.eat(1);
+
+ // break on newline or comment
+ if(!INPUT || Exp::Break().Matches(INPUT) || Exp::Comment().Matches(INPUT))
+ break;
+
+ // now read parameter
+ std::string param;
+ while(INPUT && !Exp::BlankOrBreak().Matches(INPUT))
+ param += INPUT.get();
+
+ token.params.push_back(param);
+ }
+
+ m_tokens.push(token);
+ }
+
+ // DocStart
+ void Scanner::ScanDocStart()
+ {
+ PopAllIndents();
+ PopAllSimpleKeys();
+ m_simpleKeyAllowed = false;
+ m_canBeJSONFlow = false;
+
+ // eat
+ Mark mark = INPUT.mark();
+ INPUT.eat(3);
+ m_tokens.push(Token(Token::DOC_START, mark));
+ }
+
+ // DocEnd
+ void Scanner::ScanDocEnd()
+ {
+ PopAllIndents();
+ PopAllSimpleKeys();
+ m_simpleKeyAllowed = false;
+ m_canBeJSONFlow = false;
+
+ // eat
+ Mark mark = INPUT.mark();
+ INPUT.eat(3);
+ m_tokens.push(Token(Token::DOC_END, mark));
+ }
+
+ // FlowStart
+ void Scanner::ScanFlowStart()
+ {
+ // flows can be simple keys
+ InsertPotentialSimpleKey();
+ m_simpleKeyAllowed = true;
+ m_canBeJSONFlow = false;
+
+ // eat
+ Mark mark = INPUT.mark();
+ char ch = INPUT.get();
+ FLOW_MARKER flowType = (ch == Keys::FlowSeqStart ? FLOW_SEQ : FLOW_MAP);
+ m_flows.push(flowType);
+ Token::TYPE type = (flowType == FLOW_SEQ ? Token::FLOW_SEQ_START : Token::FLOW_MAP_START);
+ m_tokens.push(Token(type, mark));
+ }
+
+ // FlowEnd
+ void Scanner::ScanFlowEnd()
+ {
+ if(InBlockContext())
+ throw ParserException(INPUT.mark(), ErrorMsg::FLOW_END);
+
+ // we might have a solo entry in the flow context
+ if(InFlowContext()) {
+ if(m_flows.top() == FLOW_MAP && VerifySimpleKey())
+ m_tokens.push(Token(Token::VALUE, INPUT.mark()));
+ else if(m_flows.top() == FLOW_SEQ)
+ InvalidateSimpleKey();
+ }
+
+ m_simpleKeyAllowed = false;
+ m_canBeJSONFlow = true;
+
+ // eat
+ Mark mark = INPUT.mark();
+ char ch = INPUT.get();
+
+ // check that it matches the start
+ FLOW_MARKER flowType = (ch == Keys::FlowSeqEnd ? FLOW_SEQ : FLOW_MAP);
+ if(m_flows.top() != flowType)
+ throw ParserException(mark, ErrorMsg::FLOW_END);
+ m_flows.pop();
+
+ Token::TYPE type = (flowType ? Token::FLOW_SEQ_END : Token::FLOW_MAP_END);
+ m_tokens.push(Token(type, mark));
+ }
+
+ // FlowEntry
+ void Scanner::ScanFlowEntry()
+ {
+ // we might have a solo entry in the flow context
+ if(InFlowContext()) {
+ if(m_flows.top() == FLOW_MAP && VerifySimpleKey())
+ m_tokens.push(Token(Token::VALUE, INPUT.mark()));
+ else if(m_flows.top() == FLOW_SEQ)
+ InvalidateSimpleKey();
+ }
+
+ m_simpleKeyAllowed = true;
+ m_canBeJSONFlow = false;
+
+ // eat
+ Mark mark = INPUT.mark();
+ INPUT.eat(1);
+ m_tokens.push(Token(Token::FLOW_ENTRY, mark));
+ }
+
+ // BlockEntry
+ void Scanner::ScanBlockEntry()
+ {
+ // we better be in the block context!
+ if(InFlowContext())
+ throw ParserException(INPUT.mark(), ErrorMsg::BLOCK_ENTRY);
+
+ // can we put it here?
+ if(!m_simpleKeyAllowed)
+ throw ParserException(INPUT.mark(), ErrorMsg::BLOCK_ENTRY);
+
+ PushIndentTo(INPUT.column(), IndentMarker::SEQ);
+ m_simpleKeyAllowed = true;
+ m_canBeJSONFlow = false;
+
+ // eat
+ Mark mark = INPUT.mark();
+ INPUT.eat(1);
+ m_tokens.push(Token(Token::BLOCK_ENTRY, mark));
+ }
+
+ // Key
+ void Scanner::ScanKey()
+ {
+ // handle keys diffently in the block context (and manage indents)
+ if(InBlockContext()) {
+ if(!m_simpleKeyAllowed)
+ throw ParserException(INPUT.mark(), ErrorMsg::MAP_KEY);
+
+ PushIndentTo(INPUT.column(), IndentMarker::MAP);
+ }
+
+ // can only put a simple key here if we're in block context
+ m_simpleKeyAllowed = InBlockContext();
+
+ // eat
+ Mark mark = INPUT.mark();
+ INPUT.eat(1);
+ m_tokens.push(Token(Token::KEY, mark));
+ }
+
+ // Value
+ void Scanner::ScanValue()
+ {
+ // and check that simple key
+ bool isSimpleKey = VerifySimpleKey();
+ m_canBeJSONFlow = false;
+
+ if(isSimpleKey) {
+ // can't follow a simple key with another simple key (dunno why, though - it seems fine)
+ m_simpleKeyAllowed = false;
+ } else {
+ // handle values diffently in the block context (and manage indents)
+ if(InBlockContext()) {
+ if(!m_simpleKeyAllowed)
+ throw ParserException(INPUT.mark(), ErrorMsg::MAP_VALUE);
+
+ PushIndentTo(INPUT.column(), IndentMarker::MAP);
+ }
+
+ // can only put a simple key here if we're in block context
+ m_simpleKeyAllowed = InBlockContext();
+ }
+
+ // eat
+ Mark mark = INPUT.mark();
+ INPUT.eat(1);
+ m_tokens.push(Token(Token::VALUE, mark));
+ }
+
+ // AnchorOrAlias
+ void Scanner::ScanAnchorOrAlias()
+ {
+ bool alias;
+ std::string name;
+
+ // insert a potential simple key
+ InsertPotentialSimpleKey();
+ m_simpleKeyAllowed = false;
+ m_canBeJSONFlow = false;
+
+ // eat the indicator
+ Mark mark = INPUT.mark();
+ char indicator = INPUT.get();
+ alias = (indicator == Keys::Alias);
+
+ // now eat the content
+ while(INPUT && Exp::Anchor().Matches(INPUT))
+ name += INPUT.get();
+
+ // we need to have read SOMETHING!
+ if(name.empty())
+ throw ParserException(INPUT.mark(), alias ? ErrorMsg::ALIAS_NOT_FOUND : ErrorMsg::ANCHOR_NOT_FOUND);
+
+ // and needs to end correctly
+ if(INPUT && !Exp::AnchorEnd().Matches(INPUT))
+ throw ParserException(INPUT.mark(), alias ? ErrorMsg::CHAR_IN_ALIAS : ErrorMsg::CHAR_IN_ANCHOR);
+
+ // and we're done
+ Token token(alias ? Token::ALIAS : Token::ANCHOR, mark);
+ token.value = name;
+ m_tokens.push(token);
+ }
+
+ // Tag
+ void Scanner::ScanTag()
+ {
+ // insert a potential simple key
+ InsertPotentialSimpleKey();
+ m_simpleKeyAllowed = false;
+ m_canBeJSONFlow = false;
+
+ Token token(Token::TAG, INPUT.mark());
+
+ // eat the indicator
+ INPUT.get();
+
+ if(INPUT && INPUT.peek() == Keys::VerbatimTagStart){
+ std::string tag = ScanVerbatimTag(INPUT);
+
+ token.value = tag;
+ token.data = Tag::VERBATIM;
+ } else {
+ bool canBeHandle;
+ token.value = ScanTagHandle(INPUT, canBeHandle);
+ if(!canBeHandle && token.value.empty())
+ token.data = Tag::NON_SPECIFIC;
+ else if(token.value.empty())
+ token.data = Tag::SECONDARY_HANDLE;
+ else
+ token.data = Tag::PRIMARY_HANDLE;
+
+ // is there a suffix?
+ if(canBeHandle && INPUT.peek() == Keys::Tag) {
+ // eat the indicator
+ INPUT.get();
+ token.params.push_back(ScanTagSuffix(INPUT));
+ token.data = Tag::NAMED_HANDLE;
+ }
+ }
+
+ m_tokens.push(token);
+ }
+
+ // PlainScalar
+ void Scanner::ScanPlainScalar()
+ {
+ std::string scalar;
+
+ // set up the scanning parameters
+ ScanScalarParams params;
+ params.end = (InFlowContext() ? Exp::EndScalarInFlow() : Exp::EndScalar()) || (Exp::BlankOrBreak() + Exp::Comment());
+ params.eatEnd = false;
+ params.indent = (InFlowContext() ? 0 : GetTopIndent() + 1);
+ params.fold = FOLD_FLOW;
+ params.eatLeadingWhitespace = true;
+ params.trimTrailingSpaces = true;
+ params.chomp = STRIP;
+ params.onDocIndicator = BREAK;
+ params.onTabInIndentation = THROW;
+
+ // insert a potential simple key
+ InsertPotentialSimpleKey();
+
+ Mark mark = INPUT.mark();
+ scalar = ScanScalar(INPUT, params);
+
+ // can have a simple key only if we ended the scalar by starting a new line
+ m_simpleKeyAllowed = params.leadingSpaces;
+ m_canBeJSONFlow = false;
+
+ // finally, check and see if we ended on an illegal character
+ //if(Exp::IllegalCharInScalar.Matches(INPUT))
+ // throw ParserException(INPUT.mark(), ErrorMsg::CHAR_IN_SCALAR);
+
+ Token token(Token::PLAIN_SCALAR, mark);
+ token.value = scalar;
+ m_tokens.push(token);
+ }
+
+ // QuotedScalar
+ void Scanner::ScanQuotedScalar()
+ {
+ std::string scalar;
+
+ // peek at single or double quote (don't eat because we need to preserve (for the time being) the input position)
+ char quote = INPUT.peek();
+ bool single = (quote == '\'');
+
+ // setup the scanning parameters
+ ScanScalarParams params;
+ params.end = (single ? RegEx(quote) && !Exp::EscSingleQuote() : RegEx(quote));
+ params.eatEnd = true;
+ params.escape = (single ? '\'' : '\\');
+ params.indent = 0;
+ params.fold = FOLD_FLOW;
+ params.eatLeadingWhitespace = true;
+ params.trimTrailingSpaces = false;
+ params.chomp = CLIP;
+ params.onDocIndicator = THROW;
+
+ // insert a potential simple key
+ InsertPotentialSimpleKey();
+
+ Mark mark = INPUT.mark();
+
+ // now eat that opening quote
+ INPUT.get();
+
+ // and scan
+ scalar = ScanScalar(INPUT, params);
+ m_simpleKeyAllowed = false;
+ m_canBeJSONFlow = true;
+
+ Token token(Token::NON_PLAIN_SCALAR, mark);
+ token.value = scalar;
+ m_tokens.push(token);
+ }
+
+ // BlockScalarToken
+ // . These need a little extra processing beforehand.
+ // . We need to scan the line where the indicator is (this doesn't count as part of the scalar),
+ // and then we need to figure out what level of indentation we'll be using.
+ void Scanner::ScanBlockScalar()
+ {
+ std::string scalar;
+
+ ScanScalarParams params;
+ params.indent = 1;
+ params.detectIndent = true;
+
+ // eat block indicator ('|' or '>')
+ Mark mark = INPUT.mark();
+ char indicator = INPUT.get();
+ params.fold = (indicator == Keys::FoldedScalar ? FOLD_BLOCK : DONT_FOLD);
+
+ // eat chomping/indentation indicators
+ params.chomp = CLIP;
+ int n = Exp::Chomp().Match(INPUT);
+ for(int i=0;i<n;i++) {
+ char ch = INPUT.get();
+ if(ch == '+')
+ params.chomp = KEEP;
+ else if(ch == '-')
+ params.chomp = STRIP;
+ else if(Exp::Digit().Matches(ch)) {
+ if(ch == '0')
+ throw ParserException(INPUT.mark(), ErrorMsg::ZERO_INDENT_IN_BLOCK);
+
+ params.indent = ch - '0';
+ params.detectIndent = false;
+ }
+ }
+
+ // now eat whitespace
+ while(Exp::Blank().Matches(INPUT))
+ INPUT.eat(1);
+
+ // and comments to the end of the line
+ if(Exp::Comment().Matches(INPUT))
+ while(INPUT && !Exp::Break().Matches(INPUT))
+ INPUT.eat(1);
+
+ // if it's not a line break, then we ran into a bad character inline
+ if(INPUT && !Exp::Break().Matches(INPUT))
+ throw ParserException(INPUT.mark(), ErrorMsg::CHAR_IN_BLOCK);
+
+ // set the initial indentation
+ if(GetTopIndent() >= 0)
+ params.indent += GetTopIndent();
+
+ params.eatLeadingWhitespace = false;
+ params.trimTrailingSpaces = false;
+ params.onTabInIndentation = THROW;
+
+ scalar = ScanScalar(INPUT, params);
+
+ // simple keys always ok after block scalars (since we're gonna start a new line anyways)
+ m_simpleKeyAllowed = true;
+ m_canBeJSONFlow = false;
+
+ Token token(Token::NON_PLAIN_SCALAR, mark);
+ token.value = scalar;
+ m_tokens.push(token);
+ }
+}