diff options
Diffstat (limited to 'yaml-cpp/src/emitterutils.cpp')
-rwxr-xr-x | yaml-cpp/src/emitterutils.cpp | 378 |
1 files changed, 378 insertions, 0 deletions
diff --git a/yaml-cpp/src/emitterutils.cpp b/yaml-cpp/src/emitterutils.cpp new file mode 100755 index 00000000..3d184d6c --- /dev/null +++ b/yaml-cpp/src/emitterutils.cpp @@ -0,0 +1,378 @@ +#include "emitterutils.h" +#include "exp.h" +#include "indentation.h" +#include "yaml-cpp/binary.h" +#include "yaml-cpp/exceptions.h" +#include "stringsource.h" +#include <sstream> +#include <iomanip> + +namespace YAML +{ + namespace Utils + { + namespace { + enum {REPLACEMENT_CHARACTER = 0xFFFD}; + + bool IsAnchorChar(int ch) { // test for ns-anchor-char + switch (ch) { + case ',': case '[': case ']': case '{': case '}': // c-flow-indicator + case ' ': case '\t': // s-white + case 0xFEFF: // c-byte-order-mark + case 0xA: case 0xD: // b-char + return false; + case 0x85: + return true; + } + + if (ch < 0x20) + return false; + + if (ch < 0x7E) + return true; + + if (ch < 0xA0) + return false; + if (ch >= 0xD800 && ch <= 0xDFFF) + return false; + if ((ch & 0xFFFE) == 0xFFFE) + return false; + if ((ch >= 0xFDD0) && (ch <= 0xFDEF)) + return false; + if (ch > 0x10FFFF) + return false; + + return true; + } + + int Utf8BytesIndicated(char ch) { + int byteVal = static_cast<unsigned char>(ch); + switch (byteVal >> 4) { + case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7: + return 1; + case 12: case 13: + return 2; + case 14: + return 3; + case 15: + return 4; + default: + return -1; + } + } + + bool IsTrailingByte(char ch) { + return (ch & 0xC0) == 0x80; + } + + bool GetNextCodePointAndAdvance(int& codePoint, std::string::const_iterator& first, std::string::const_iterator last) { + if (first == last) + return false; + + int nBytes = Utf8BytesIndicated(*first); + if (nBytes < 1) { + // Bad lead byte + ++first; + codePoint = REPLACEMENT_CHARACTER; + return true; + } + + if (nBytes == 1) { + codePoint = *first++; + return true; + } + + // Gather bits from trailing bytes + codePoint = static_cast<unsigned char>(*first) & ~(0xFF << (7 - nBytes)); + ++first; + --nBytes; + for (; nBytes > 0; ++first, --nBytes) { + if ((first == last) || !IsTrailingByte(*first)) { + codePoint = REPLACEMENT_CHARACTER; + break; + } + codePoint <<= 6; + codePoint |= *first & 0x3F; + } + + // Check for illegal code points + if (codePoint > 0x10FFFF) + codePoint = REPLACEMENT_CHARACTER; + else if (codePoint >= 0xD800 && codePoint <= 0xDFFF) + codePoint = REPLACEMENT_CHARACTER; + else if ((codePoint & 0xFFFE) == 0xFFFE) + codePoint = REPLACEMENT_CHARACTER; + else if (codePoint >= 0xFDD0 && codePoint <= 0xFDEF) + codePoint = REPLACEMENT_CHARACTER; + return true; + } + + void WriteCodePoint(ostream& out, int codePoint) { + if (codePoint < 0 || codePoint > 0x10FFFF) { + codePoint = REPLACEMENT_CHARACTER; + } + if (codePoint < 0x7F) { + out << static_cast<char>(codePoint); + } else if (codePoint < 0x7FF) { + out << static_cast<char>(0xC0 | (codePoint >> 6)) + << static_cast<char>(0x80 | (codePoint & 0x3F)); + } else if (codePoint < 0xFFFF) { + out << static_cast<char>(0xE0 | (codePoint >> 12)) + << static_cast<char>(0x80 | ((codePoint >> 6) & 0x3F)) + << static_cast<char>(0x80 | (codePoint & 0x3F)); + } else { + out << static_cast<char>(0xF0 | (codePoint >> 18)) + << static_cast<char>(0x80 | ((codePoint >> 12) & 0x3F)) + << static_cast<char>(0x80 | ((codePoint >> 6) & 0x3F)) + << static_cast<char>(0x80 | (codePoint & 0x3F)); + } + } + + bool IsValidPlainScalar(const std::string& str, bool inFlow, bool allowOnlyAscii) { + if(str.empty()) + return false; + + // first check the start + const RegEx& start = (inFlow ? Exp::PlainScalarInFlow() : Exp::PlainScalar()); + if(!start.Matches(str)) + return false; + + // and check the end for plain whitespace (which can't be faithfully kept in a plain scalar) + if(!str.empty() && *str.rbegin() == ' ') + return false; + + // then check until something is disallowed + const RegEx& disallowed = (inFlow ? Exp::EndScalarInFlow() : Exp::EndScalar()) + || (Exp::BlankOrBreak() + Exp::Comment()) + || Exp::NotPrintable() + || Exp::Utf8_ByteOrderMark() + || Exp::Break() + || Exp::Tab(); + StringCharSource buffer(str.c_str(), str.size()); + while(buffer) { + if(disallowed.Matches(buffer)) + return false; + if(allowOnlyAscii && (0x7F < static_cast<unsigned char>(buffer[0]))) + return false; + ++buffer; + } + + return true; + } + + void WriteDoubleQuoteEscapeSequence(ostream& out, int codePoint) { + static const char hexDigits[] = "0123456789abcdef"; + + char escSeq[] = "\\U00000000"; + int digits = 8; + if (codePoint < 0xFF) { + escSeq[1] = 'x'; + digits = 2; + } else if (codePoint < 0xFFFF) { + escSeq[1] = 'u'; + digits = 4; + } + + // Write digits into the escape sequence + int i = 2; + for (; digits > 0; --digits, ++i) { + escSeq[i] = hexDigits[(codePoint >> (4 * (digits - 1))) & 0xF]; + } + + escSeq[i] = 0; // terminate with NUL character + out << escSeq; + } + + bool WriteAliasName(ostream& out, const std::string& str) { + int codePoint; + for(std::string::const_iterator i = str.begin(); + GetNextCodePointAndAdvance(codePoint, i, str.end()); + ) + { + if (!IsAnchorChar(codePoint)) + return false; + + WriteCodePoint(out, codePoint); + } + return true; + } + } + + bool WriteString(ostream& out, const std::string& str, bool inFlow, bool escapeNonAscii) + { + if(IsValidPlainScalar(str, inFlow, escapeNonAscii)) { + out << str; + return true; + } else + return WriteDoubleQuotedString(out, str, escapeNonAscii); + } + + bool WriteSingleQuotedString(ostream& out, const std::string& str) + { + out << "'"; + int codePoint; + for(std::string::const_iterator i = str.begin(); + GetNextCodePointAndAdvance(codePoint, i, str.end()); + ) + { + if (codePoint == '\n') + return false; // We can't handle a new line and the attendant indentation yet + + if (codePoint == '\'') + out << "''"; + else + WriteCodePoint(out, codePoint); + } + out << "'"; + return true; + } + + bool WriteDoubleQuotedString(ostream& out, const std::string& str, bool escapeNonAscii) + { + out << "\""; + int codePoint; + for(std::string::const_iterator i = str.begin(); + GetNextCodePointAndAdvance(codePoint, i, str.end()); + ) + { + if (codePoint == '\"') + out << "\\\""; + else if (codePoint == '\\') + out << "\\\\"; + else if (codePoint < 0x20 || (codePoint >= 0x80 && codePoint <= 0xA0)) // Control characters and non-breaking space + WriteDoubleQuoteEscapeSequence(out, codePoint); + else if (codePoint == 0xFEFF) // Byte order marks (ZWNS) should be escaped (YAML 1.2, sec. 5.2) + WriteDoubleQuoteEscapeSequence(out, codePoint); + else if (escapeNonAscii && codePoint > 0x7E) + WriteDoubleQuoteEscapeSequence(out, codePoint); + else + WriteCodePoint(out, codePoint); + } + out << "\""; + return true; + } + + bool WriteLiteralString(ostream& out, const std::string& str, int indent) + { + out << "|\n"; + out << IndentTo(indent); + int codePoint; + for(std::string::const_iterator i = str.begin(); + GetNextCodePointAndAdvance(codePoint, i, str.end()); + ) + { + if (codePoint == '\n') + out << "\n" << IndentTo(indent); + else + WriteCodePoint(out, codePoint); + } + return true; + } + + bool WriteChar(ostream& out, char ch) + { + if(('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z')) + out << ch; + else if((0x20 <= ch && ch <= 0x7e) || ch == ' ') + out << "\"" << ch << "\""; + else if(ch == '\t') + out << "\"\\t\""; + else if(ch == '\n') + out << "\"\\n\""; + else if(ch == '\b') + out << "\"\\b\""; + else { + out << "\""; + WriteDoubleQuoteEscapeSequence(out, ch); + out << "\""; + } + return true; + } + + bool WriteComment(ostream& out, const std::string& str, int postCommentIndent) + { + const unsigned curIndent = out.col(); + out << "#" << Indentation(postCommentIndent); + int codePoint; + for(std::string::const_iterator i = str.begin(); + GetNextCodePointAndAdvance(codePoint, i, str.end()); + ) + { + if(codePoint == '\n') + out << "\n" << IndentTo(curIndent) << "#" << Indentation(postCommentIndent); + else + WriteCodePoint(out, codePoint); + } + return true; + } + + bool WriteAlias(ostream& out, const std::string& str) + { + out << "*"; + return WriteAliasName(out, str); + } + + bool WriteAnchor(ostream& out, const std::string& str) + { + out << "&"; + return WriteAliasName(out, str); + } + + bool WriteTag(ostream& out, const std::string& str, bool verbatim) + { + out << (verbatim ? "!<" : "!"); + StringCharSource buffer(str.c_str(), str.size()); + const RegEx& reValid = verbatim ? Exp::URI() : Exp::Tag(); + while(buffer) { + int n = reValid.Match(buffer); + if(n <= 0) + return false; + + while(--n >= 0) { + out << buffer[0]; + ++buffer; + } + } + if (verbatim) + out << ">"; + return true; + } + + bool WriteTagWithPrefix(ostream& out, const std::string& prefix, const std::string& tag) + { + out << "!"; + StringCharSource prefixBuffer(prefix.c_str(), prefix.size()); + while(prefixBuffer) { + int n = Exp::URI().Match(prefixBuffer); + if(n <= 0) + return false; + + while(--n >= 0) { + out << prefixBuffer[0]; + ++prefixBuffer; + } + } + + out << "!"; + StringCharSource tagBuffer(tag.c_str(), tag.size()); + while(tagBuffer) { + int n = Exp::Tag().Match(tagBuffer); + if(n <= 0) + return false; + + while(--n >= 0) { + out << tagBuffer[0]; + ++tagBuffer; + } + } + return true; + } + + bool WriteBinary(ostream& out, const Binary& binary) + { + WriteDoubleQuotedString(out, EncodeBase64(binary.data(), binary.size()), false); + return true; + } + } +} + |