diff options
Diffstat (limited to 'yaml-cpp/src/stream.cpp')
-rwxr-xr-x | yaml-cpp/src/stream.cpp | 448 |
1 files changed, 0 insertions, 448 deletions
diff --git a/yaml-cpp/src/stream.cpp b/yaml-cpp/src/stream.cpp deleted file mode 100755 index 447b67c1..00000000 --- a/yaml-cpp/src/stream.cpp +++ /dev/null @@ -1,448 +0,0 @@ -#include "stream.h" -#include <iostream> -#include "exp.h" - -#ifndef YAML_PREFETCH_SIZE -#define YAML_PREFETCH_SIZE 2048 -#endif - -#define S_ARRAY_SIZE( A ) (sizeof(A)/sizeof(*(A))) -#define S_ARRAY_END( A ) ((A) + S_ARRAY_SIZE(A)) - -#define CP_REPLACEMENT_CHARACTER (0xFFFD) - -namespace YAML -{ - enum UtfIntroState { - uis_start, - uis_utfbe_b1, - uis_utf32be_b2, - uis_utf32be_bom3, - uis_utf32be, - uis_utf16be, - uis_utf16be_bom1, - uis_utfle_bom1, - uis_utf16le_bom2, - uis_utf32le_bom3, - uis_utf16le, - uis_utf32le, - uis_utf8_imp, - uis_utf16le_imp, - uis_utf32le_imp3, - uis_utf8_bom1, - uis_utf8_bom2, - uis_utf8, - uis_error - }; - - enum UtfIntroCharType { - uict00, - uictBB, - uictBF, - uictEF, - uictFE, - uictFF, - uictAscii, - uictOther, - uictMax - }; - - static bool s_introFinalState[] = { - false, //uis_start - false, //uis_utfbe_b1 - false, //uis_utf32be_b2 - false, //uis_utf32be_bom3 - true, //uis_utf32be - true, //uis_utf16be - false, //uis_utf16be_bom1 - false, //uis_utfle_bom1 - false, //uis_utf16le_bom2 - false, //uis_utf32le_bom3 - true, //uis_utf16le - true, //uis_utf32le - false, //uis_utf8_imp - false, //uis_utf16le_imp - false, //uis_utf32le_imp3 - false, //uis_utf8_bom1 - false, //uis_utf8_bom2 - true, //uis_utf8 - true, //uis_error - }; - - static UtfIntroState s_introTransitions[][uictMax] = { - // uict00, uictBB, uictBF, uictEF, uictFE, uictFF, uictAscii, uictOther - {uis_utfbe_b1, uis_utf8, uis_utf8, uis_utf8_bom1, uis_utf16be_bom1, uis_utfle_bom1, uis_utf8_imp, uis_utf8}, - {uis_utf32be_b2, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf16be, uis_utf8}, - {uis_utf32be, uis_utf8, uis_utf8, uis_utf8, uis_utf32be_bom3, uis_utf8, uis_utf8, uis_utf8}, - {uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf32be, uis_utf8, uis_utf8}, - {uis_utf32be, uis_utf32be, uis_utf32be, uis_utf32be, uis_utf32be, uis_utf32be, uis_utf32be, uis_utf32be}, - {uis_utf16be, uis_utf16be, uis_utf16be, uis_utf16be, uis_utf16be, uis_utf16be, uis_utf16be, uis_utf16be}, - {uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf16be, uis_utf8, uis_utf8}, - {uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf16le_bom2, uis_utf8, uis_utf8, uis_utf8}, - {uis_utf32le_bom3, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le}, - {uis_utf32le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le}, - {uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le}, - {uis_utf32le, uis_utf32le, uis_utf32le, uis_utf32le, uis_utf32le, uis_utf32le, uis_utf32le, uis_utf32le}, - {uis_utf16le_imp, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8}, - {uis_utf32le_imp3, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le}, - {uis_utf32le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le}, - {uis_utf8, uis_utf8_bom2, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8}, - {uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8}, - {uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8}, - }; - - static char s_introUngetCount[][uictMax] = { - // uict00, uictBB, uictBF, uictEF, uictFE, uictFF, uictAscii, uictOther - {0, 1, 1, 0, 0, 0, 0, 1}, - {0, 2, 2, 2, 2, 2, 2, 2}, - {3, 3, 3, 3, 0, 3, 3, 3}, - {4, 4, 4, 4, 4, 0, 4, 4}, - {1, 1, 1, 1, 1, 1, 1, 1}, - {1, 1, 1, 1, 1, 1, 1, 1}, - {2, 2, 2, 2, 2, 0, 2, 2}, - {2, 2, 2, 2, 0, 2, 2, 2}, - {0, 1, 1, 1, 1, 1, 1, 1}, - {0, 2, 2, 2, 2, 2, 2, 2}, - {1, 1, 1, 1, 1, 1, 1, 1}, - {1, 1, 1, 1, 1, 1, 1, 1}, - {0, 2, 2, 2, 2, 2, 2, 2}, - {0, 3, 3, 3, 3, 3, 3, 3}, - {4, 4, 4, 4, 4, 4, 4, 4}, - {2, 0, 2, 2, 2, 2, 2, 2}, - {3, 3, 0, 3, 3, 3, 3, 3}, - {1, 1, 1, 1, 1, 1, 1, 1}, - }; - - inline UtfIntroCharType IntroCharTypeOf(std::istream::int_type ch) - { - if (std::istream::traits_type::eof() == ch) { - return uictOther; - } - - switch (ch) { - case 0: return uict00; - case 0xBB: return uictBB; - case 0xBF: return uictBF; - case 0xEF: return uictEF; - case 0xFE: return uictFE; - case 0xFF: return uictFF; - } - - if ((ch > 0) && (ch < 0xFF)) { - return uictAscii; - } - - return uictOther; - } - - inline char Utf8Adjust(unsigned long ch, unsigned char lead_bits, unsigned char rshift) - { - const unsigned char header = ((1 << lead_bits) - 1) << (8 - lead_bits); - const unsigned char mask = (0xFF >> (lead_bits + 1)); - return static_cast<char>(static_cast<unsigned char>( - header | ((ch >> rshift) & mask) - )); - } - - inline void QueueUnicodeCodepoint(std::deque<char>& q, unsigned long ch) - { - // We are not allowed to queue the Stream::eof() codepoint, so - // replace it with CP_REPLACEMENT_CHARACTER - if (static_cast<unsigned long>(Stream::eof()) == ch) - { - ch = CP_REPLACEMENT_CHARACTER; - } - - if (ch < 0x80) - { - q.push_back(Utf8Adjust(ch, 0, 0)); - } - else if (ch < 0x800) - { - q.push_back(Utf8Adjust(ch, 2, 6)); - q.push_back(Utf8Adjust(ch, 1, 0)); - } - else if (ch < 0x10000) - { - q.push_back(Utf8Adjust(ch, 3, 12)); - q.push_back(Utf8Adjust(ch, 1, 6)); - q.push_back(Utf8Adjust(ch, 1, 0)); - } - else - { - q.push_back(Utf8Adjust(ch, 4, 18)); - q.push_back(Utf8Adjust(ch, 1, 12)); - q.push_back(Utf8Adjust(ch, 1, 6)); - q.push_back(Utf8Adjust(ch, 1, 0)); - } - } - - Stream::Stream(std::istream& input) - : m_input(input), - m_pPrefetched(new unsigned char[YAML_PREFETCH_SIZE]), - m_nPrefetchedAvailable(0), m_nPrefetchedUsed(0) - { - typedef std::istream::traits_type char_traits; - - if(!input) - return; - - // Determine (or guess) the character-set by reading the BOM, if any. See - // the YAML specification for the determination algorithm. - char_traits::int_type intro[4]; - int nIntroUsed = 0; - UtfIntroState state = uis_start; - for(; !s_introFinalState[state]; ) { - std::istream::int_type ch = input.get(); - intro[nIntroUsed++] = ch; - UtfIntroCharType charType = IntroCharTypeOf(ch); - UtfIntroState newState = s_introTransitions[state][charType]; - int nUngets = s_introUngetCount[state][charType]; - if(nUngets > 0) { - input.clear(); - for(; nUngets > 0; --nUngets) { - if(char_traits::eof() != intro[--nIntroUsed]) - input.putback(char_traits::to_char_type(intro[nIntroUsed])); - } - } - state = newState; - } - - switch (state) { - case uis_utf8: m_charSet = utf8; break; - case uis_utf16le: m_charSet = utf16le; break; - case uis_utf16be: m_charSet = utf16be; break; - case uis_utf32le: m_charSet = utf32le; break; - case uis_utf32be: m_charSet = utf32be; break; - default: m_charSet = utf8; break; - } - - ReadAheadTo(0); - } - - Stream::~Stream() - { - delete[] m_pPrefetched; - } - - char Stream::peek() const - { - if (m_readahead.empty()) - { - return Stream::eof(); - } - - return m_readahead[0]; - } - - Stream::operator bool() const - { - return m_input.good() || (!m_readahead.empty() && m_readahead[0] != Stream::eof()); - } - - // get - // . Extracts a character from the stream and updates our position - char Stream::get() - { - char ch = peek(); - AdvanceCurrent(); - m_mark.column++; - - if(ch == '\n') { - m_mark.column = 0; - m_mark.line++; - } - - return ch; - } - - // get - // . Extracts 'n' characters from the stream and updates our position - std::string Stream::get(int n) - { - std::string ret; - ret.reserve(n); - for(int i=0;i<n;i++) - ret += get(); - return ret; - } - - // eat - // . Eats 'n' characters and updates our position. - void Stream::eat(int n) - { - for(int i=0;i<n;i++) - get(); - } - - void Stream::AdvanceCurrent() - { - if (!m_readahead.empty()) - { - m_readahead.pop_front(); - m_mark.pos++; - } - - ReadAheadTo(0); - } - - bool Stream::_ReadAheadTo(size_t i) const - { - while (m_input.good() && (m_readahead.size() <= i)) - { - switch (m_charSet) - { - case utf8: StreamInUtf8(); break; - case utf16le: StreamInUtf16(); break; - case utf16be: StreamInUtf16(); break; - case utf32le: StreamInUtf32(); break; - case utf32be: StreamInUtf32(); break; - } - } - - // signal end of stream - if(!m_input.good()) - m_readahead.push_back(Stream::eof()); - - return m_readahead.size() > i; - } - - void Stream::StreamInUtf8() const - { - unsigned char b = GetNextByte(); - if (m_input.good()) - { - m_readahead.push_back(b); - } - } - - void Stream::StreamInUtf16() const - { - unsigned long ch = 0; - unsigned char bytes[2]; - int nBigEnd = (m_charSet == utf16be) ? 0 : 1; - - bytes[0] = GetNextByte(); - bytes[1] = GetNextByte(); - if (!m_input.good()) - { - return; - } - ch = (static_cast<unsigned long>(bytes[nBigEnd]) << 8) | - static_cast<unsigned long>(bytes[1 ^ nBigEnd]); - - if (ch >= 0xDC00 && ch < 0xE000) - { - // Trailing (low) surrogate...ugh, wrong order - QueueUnicodeCodepoint(m_readahead, CP_REPLACEMENT_CHARACTER); - return; - } - else if (ch >= 0xD800 && ch < 0xDC00) - { - // ch is a leading (high) surrogate - - // Four byte UTF-8 code point - - // Read the trailing (low) surrogate - for (;;) - { - bytes[0] = GetNextByte(); - bytes[1] = GetNextByte(); - if (!m_input.good()) - { - QueueUnicodeCodepoint(m_readahead, CP_REPLACEMENT_CHARACTER); - return; - } - unsigned long chLow = (static_cast<unsigned long>(bytes[nBigEnd]) << 8) | - static_cast<unsigned long>(bytes[1 ^ nBigEnd]); - if (chLow < 0xDC00 || ch >= 0xE000) - { - // Trouble...not a low surrogate. Dump a REPLACEMENT CHARACTER into the stream. - QueueUnicodeCodepoint(m_readahead, CP_REPLACEMENT_CHARACTER); - - // Deal with the next UTF-16 unit - if (chLow < 0xD800 || ch >= 0xE000) - { - // Easiest case: queue the codepoint and return - QueueUnicodeCodepoint(m_readahead, ch); - return; - } - else - { - // Start the loop over with the new high surrogate - ch = chLow; - continue; - } - } - - // Select the payload bits from the high surrogate - ch &= 0x3FF; - ch <<= 10; - - // Include bits from low surrogate - ch |= (chLow & 0x3FF); - - // Add the surrogacy offset - ch += 0x10000; - } - } - - QueueUnicodeCodepoint(m_readahead, ch); - } - - inline char* ReadBuffer(unsigned char* pBuffer) - { - return reinterpret_cast<char*>(pBuffer); - } - - unsigned char Stream::GetNextByte() const - { - if (m_nPrefetchedUsed >= m_nPrefetchedAvailable) - { - std::streambuf *pBuf = m_input.rdbuf(); - m_nPrefetchedAvailable = (size_t)pBuf->sgetn(ReadBuffer(m_pPrefetched), - YAML_PREFETCH_SIZE); - m_nPrefetchedUsed = 0; - if (!m_nPrefetchedAvailable) - { - m_input.setstate(std::ios_base::eofbit); - } - - if (0 == m_nPrefetchedAvailable) - { - return 0; - } - } - - return m_pPrefetched[m_nPrefetchedUsed++]; - } - - void Stream::StreamInUtf32() const - { - static int indexes[2][4] = { - {3, 2, 1, 0}, - {0, 1, 2, 3} - }; - - unsigned long ch = 0; - unsigned char bytes[4]; - int* pIndexes = (m_charSet == utf32be) ? indexes[1] : indexes[0]; - - bytes[0] = GetNextByte(); - bytes[1] = GetNextByte(); - bytes[2] = GetNextByte(); - bytes[3] = GetNextByte(); - if (!m_input.good()) - { - return; - } - - for (int i = 0; i < 4; ++i) - { - ch <<= 8; - ch |= bytes[pIndexes[i]]; - } - - QueueUnicodeCodepoint(m_readahead, ch); - } -} |