diff options
Diffstat (limited to 'external_libs/yaml-cpp/src/stream.cpp')
-rw-r--r-- | external_libs/yaml-cpp/src/stream.cpp | 448 |
1 files changed, 448 insertions, 0 deletions
diff --git a/external_libs/yaml-cpp/src/stream.cpp b/external_libs/yaml-cpp/src/stream.cpp new file mode 100644 index 00000000..447b67c1 --- /dev/null +++ b/external_libs/yaml-cpp/src/stream.cpp @@ -0,0 +1,448 @@ +#include "stream.h" +#include <iostream> +#include "exp.h" + +#ifndef YAML_PREFETCH_SIZE +#define YAML_PREFETCH_SIZE 2048 +#endif + +#define S_ARRAY_SIZE( A ) (sizeof(A)/sizeof(*(A))) +#define S_ARRAY_END( A ) ((A) + S_ARRAY_SIZE(A)) + +#define CP_REPLACEMENT_CHARACTER (0xFFFD) + +namespace YAML +{ + enum UtfIntroState { + uis_start, + uis_utfbe_b1, + uis_utf32be_b2, + uis_utf32be_bom3, + uis_utf32be, + uis_utf16be, + uis_utf16be_bom1, + uis_utfle_bom1, + uis_utf16le_bom2, + uis_utf32le_bom3, + uis_utf16le, + uis_utf32le, + uis_utf8_imp, + uis_utf16le_imp, + uis_utf32le_imp3, + uis_utf8_bom1, + uis_utf8_bom2, + uis_utf8, + uis_error + }; + + enum UtfIntroCharType { + uict00, + uictBB, + uictBF, + uictEF, + uictFE, + uictFF, + uictAscii, + uictOther, + uictMax + }; + + static bool s_introFinalState[] = { + false, //uis_start + false, //uis_utfbe_b1 + false, //uis_utf32be_b2 + false, //uis_utf32be_bom3 + true, //uis_utf32be + true, //uis_utf16be + false, //uis_utf16be_bom1 + false, //uis_utfle_bom1 + false, //uis_utf16le_bom2 + false, //uis_utf32le_bom3 + true, //uis_utf16le + true, //uis_utf32le + false, //uis_utf8_imp + false, //uis_utf16le_imp + false, //uis_utf32le_imp3 + false, //uis_utf8_bom1 + false, //uis_utf8_bom2 + true, //uis_utf8 + true, //uis_error + }; + + static UtfIntroState s_introTransitions[][uictMax] = { + // uict00, uictBB, uictBF, uictEF, uictFE, uictFF, uictAscii, uictOther + {uis_utfbe_b1, uis_utf8, uis_utf8, uis_utf8_bom1, uis_utf16be_bom1, uis_utfle_bom1, uis_utf8_imp, uis_utf8}, + {uis_utf32be_b2, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf16be, uis_utf8}, + {uis_utf32be, uis_utf8, uis_utf8, uis_utf8, uis_utf32be_bom3, uis_utf8, uis_utf8, uis_utf8}, + {uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf32be, uis_utf8, uis_utf8}, + {uis_utf32be, uis_utf32be, uis_utf32be, uis_utf32be, uis_utf32be, uis_utf32be, uis_utf32be, uis_utf32be}, + {uis_utf16be, uis_utf16be, uis_utf16be, uis_utf16be, uis_utf16be, uis_utf16be, uis_utf16be, uis_utf16be}, + {uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf16be, uis_utf8, uis_utf8}, + {uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf16le_bom2, uis_utf8, uis_utf8, uis_utf8}, + {uis_utf32le_bom3, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le}, + {uis_utf32le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le}, + {uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le}, + {uis_utf32le, uis_utf32le, uis_utf32le, uis_utf32le, uis_utf32le, uis_utf32le, uis_utf32le, uis_utf32le}, + {uis_utf16le_imp, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8}, + {uis_utf32le_imp3, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le}, + {uis_utf32le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le}, + {uis_utf8, uis_utf8_bom2, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8}, + {uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8}, + {uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8}, + }; + + static char s_introUngetCount[][uictMax] = { + // uict00, uictBB, uictBF, uictEF, uictFE, uictFF, uictAscii, uictOther + {0, 1, 1, 0, 0, 0, 0, 1}, + {0, 2, 2, 2, 2, 2, 2, 2}, + {3, 3, 3, 3, 0, 3, 3, 3}, + {4, 4, 4, 4, 4, 0, 4, 4}, + {1, 1, 1, 1, 1, 1, 1, 1}, + {1, 1, 1, 1, 1, 1, 1, 1}, + {2, 2, 2, 2, 2, 0, 2, 2}, + {2, 2, 2, 2, 0, 2, 2, 2}, + {0, 1, 1, 1, 1, 1, 1, 1}, + {0, 2, 2, 2, 2, 2, 2, 2}, + {1, 1, 1, 1, 1, 1, 1, 1}, + {1, 1, 1, 1, 1, 1, 1, 1}, + {0, 2, 2, 2, 2, 2, 2, 2}, + {0, 3, 3, 3, 3, 3, 3, 3}, + {4, 4, 4, 4, 4, 4, 4, 4}, + {2, 0, 2, 2, 2, 2, 2, 2}, + {3, 3, 0, 3, 3, 3, 3, 3}, + {1, 1, 1, 1, 1, 1, 1, 1}, + }; + + inline UtfIntroCharType IntroCharTypeOf(std::istream::int_type ch) + { + if (std::istream::traits_type::eof() == ch) { + return uictOther; + } + + switch (ch) { + case 0: return uict00; + case 0xBB: return uictBB; + case 0xBF: return uictBF; + case 0xEF: return uictEF; + case 0xFE: return uictFE; + case 0xFF: return uictFF; + } + + if ((ch > 0) && (ch < 0xFF)) { + return uictAscii; + } + + return uictOther; + } + + inline char Utf8Adjust(unsigned long ch, unsigned char lead_bits, unsigned char rshift) + { + const unsigned char header = ((1 << lead_bits) - 1) << (8 - lead_bits); + const unsigned char mask = (0xFF >> (lead_bits + 1)); + return static_cast<char>(static_cast<unsigned char>( + header | ((ch >> rshift) & mask) + )); + } + + inline void QueueUnicodeCodepoint(std::deque<char>& q, unsigned long ch) + { + // We are not allowed to queue the Stream::eof() codepoint, so + // replace it with CP_REPLACEMENT_CHARACTER + if (static_cast<unsigned long>(Stream::eof()) == ch) + { + ch = CP_REPLACEMENT_CHARACTER; + } + + if (ch < 0x80) + { + q.push_back(Utf8Adjust(ch, 0, 0)); + } + else if (ch < 0x800) + { + q.push_back(Utf8Adjust(ch, 2, 6)); + q.push_back(Utf8Adjust(ch, 1, 0)); + } + else if (ch < 0x10000) + { + q.push_back(Utf8Adjust(ch, 3, 12)); + q.push_back(Utf8Adjust(ch, 1, 6)); + q.push_back(Utf8Adjust(ch, 1, 0)); + } + else + { + q.push_back(Utf8Adjust(ch, 4, 18)); + q.push_back(Utf8Adjust(ch, 1, 12)); + q.push_back(Utf8Adjust(ch, 1, 6)); + q.push_back(Utf8Adjust(ch, 1, 0)); + } + } + + Stream::Stream(std::istream& input) + : m_input(input), + m_pPrefetched(new unsigned char[YAML_PREFETCH_SIZE]), + m_nPrefetchedAvailable(0), m_nPrefetchedUsed(0) + { + typedef std::istream::traits_type char_traits; + + if(!input) + return; + + // Determine (or guess) the character-set by reading the BOM, if any. See + // the YAML specification for the determination algorithm. + char_traits::int_type intro[4]; + int nIntroUsed = 0; + UtfIntroState state = uis_start; + for(; !s_introFinalState[state]; ) { + std::istream::int_type ch = input.get(); + intro[nIntroUsed++] = ch; + UtfIntroCharType charType = IntroCharTypeOf(ch); + UtfIntroState newState = s_introTransitions[state][charType]; + int nUngets = s_introUngetCount[state][charType]; + if(nUngets > 0) { + input.clear(); + for(; nUngets > 0; --nUngets) { + if(char_traits::eof() != intro[--nIntroUsed]) + input.putback(char_traits::to_char_type(intro[nIntroUsed])); + } + } + state = newState; + } + + switch (state) { + case uis_utf8: m_charSet = utf8; break; + case uis_utf16le: m_charSet = utf16le; break; + case uis_utf16be: m_charSet = utf16be; break; + case uis_utf32le: m_charSet = utf32le; break; + case uis_utf32be: m_charSet = utf32be; break; + default: m_charSet = utf8; break; + } + + ReadAheadTo(0); + } + + Stream::~Stream() + { + delete[] m_pPrefetched; + } + + char Stream::peek() const + { + if (m_readahead.empty()) + { + return Stream::eof(); + } + + return m_readahead[0]; + } + + Stream::operator bool() const + { + return m_input.good() || (!m_readahead.empty() && m_readahead[0] != Stream::eof()); + } + + // get + // . Extracts a character from the stream and updates our position + char Stream::get() + { + char ch = peek(); + AdvanceCurrent(); + m_mark.column++; + + if(ch == '\n') { + m_mark.column = 0; + m_mark.line++; + } + + return ch; + } + + // get + // . Extracts 'n' characters from the stream and updates our position + std::string Stream::get(int n) + { + std::string ret; + ret.reserve(n); + for(int i=0;i<n;i++) + ret += get(); + return ret; + } + + // eat + // . Eats 'n' characters and updates our position. + void Stream::eat(int n) + { + for(int i=0;i<n;i++) + get(); + } + + void Stream::AdvanceCurrent() + { + if (!m_readahead.empty()) + { + m_readahead.pop_front(); + m_mark.pos++; + } + + ReadAheadTo(0); + } + + bool Stream::_ReadAheadTo(size_t i) const + { + while (m_input.good() && (m_readahead.size() <= i)) + { + switch (m_charSet) + { + case utf8: StreamInUtf8(); break; + case utf16le: StreamInUtf16(); break; + case utf16be: StreamInUtf16(); break; + case utf32le: StreamInUtf32(); break; + case utf32be: StreamInUtf32(); break; + } + } + + // signal end of stream + if(!m_input.good()) + m_readahead.push_back(Stream::eof()); + + return m_readahead.size() > i; + } + + void Stream::StreamInUtf8() const + { + unsigned char b = GetNextByte(); + if (m_input.good()) + { + m_readahead.push_back(b); + } + } + + void Stream::StreamInUtf16() const + { + unsigned long ch = 0; + unsigned char bytes[2]; + int nBigEnd = (m_charSet == utf16be) ? 0 : 1; + + bytes[0] = GetNextByte(); + bytes[1] = GetNextByte(); + if (!m_input.good()) + { + return; + } + ch = (static_cast<unsigned long>(bytes[nBigEnd]) << 8) | + static_cast<unsigned long>(bytes[1 ^ nBigEnd]); + + if (ch >= 0xDC00 && ch < 0xE000) + { + // Trailing (low) surrogate...ugh, wrong order + QueueUnicodeCodepoint(m_readahead, CP_REPLACEMENT_CHARACTER); + return; + } + else if (ch >= 0xD800 && ch < 0xDC00) + { + // ch is a leading (high) surrogate + + // Four byte UTF-8 code point + + // Read the trailing (low) surrogate + for (;;) + { + bytes[0] = GetNextByte(); + bytes[1] = GetNextByte(); + if (!m_input.good()) + { + QueueUnicodeCodepoint(m_readahead, CP_REPLACEMENT_CHARACTER); + return; + } + unsigned long chLow = (static_cast<unsigned long>(bytes[nBigEnd]) << 8) | + static_cast<unsigned long>(bytes[1 ^ nBigEnd]); + if (chLow < 0xDC00 || ch >= 0xE000) + { + // Trouble...not a low surrogate. Dump a REPLACEMENT CHARACTER into the stream. + QueueUnicodeCodepoint(m_readahead, CP_REPLACEMENT_CHARACTER); + + // Deal with the next UTF-16 unit + if (chLow < 0xD800 || ch >= 0xE000) + { + // Easiest case: queue the codepoint and return + QueueUnicodeCodepoint(m_readahead, ch); + return; + } + else + { + // Start the loop over with the new high surrogate + ch = chLow; + continue; + } + } + + // Select the payload bits from the high surrogate + ch &= 0x3FF; + ch <<= 10; + + // Include bits from low surrogate + ch |= (chLow & 0x3FF); + + // Add the surrogacy offset + ch += 0x10000; + } + } + + QueueUnicodeCodepoint(m_readahead, ch); + } + + inline char* ReadBuffer(unsigned char* pBuffer) + { + return reinterpret_cast<char*>(pBuffer); + } + + unsigned char Stream::GetNextByte() const + { + if (m_nPrefetchedUsed >= m_nPrefetchedAvailable) + { + std::streambuf *pBuf = m_input.rdbuf(); + m_nPrefetchedAvailable = (size_t)pBuf->sgetn(ReadBuffer(m_pPrefetched), + YAML_PREFETCH_SIZE); + m_nPrefetchedUsed = 0; + if (!m_nPrefetchedAvailable) + { + m_input.setstate(std::ios_base::eofbit); + } + + if (0 == m_nPrefetchedAvailable) + { + return 0; + } + } + + return m_pPrefetched[m_nPrefetchedUsed++]; + } + + void Stream::StreamInUtf32() const + { + static int indexes[2][4] = { + {3, 2, 1, 0}, + {0, 1, 2, 3} + }; + + unsigned long ch = 0; + unsigned char bytes[4]; + int* pIndexes = (m_charSet == utf32be) ? indexes[1] : indexes[0]; + + bytes[0] = GetNextByte(); + bytes[1] = GetNextByte(); + bytes[2] = GetNextByte(); + bytes[3] = GetNextByte(); + if (!m_input.good()) + { + return; + } + + for (int i = 0; i < 4; ++i) + { + ch <<= 8; + ch |= bytes[pIndexes[i]]; + } + + QueueUnicodeCodepoint(m_readahead, ch); + } +} |