1 files changed, 448 insertions, 0 deletions
diff --git a/yaml-cpp/src/stream.cpp b/yaml-cpp/src/stream.cpp
new file mode 100755
index 00000000..447b67c1
--- /dev/null
+++ b/yaml-cpp/src/stream.cpp
@@ -0,0 +1,448 @@
+#include "stream.h"
+#include <iostream>
+#include "exp.h"
+
+#ifndef YAML_PREFETCH_SIZE
+#define YAML_PREFETCH_SIZE 2048
+#endif
+
+#define S_ARRAY_SIZE( A ) (sizeof(A)/sizeof(*(A)))
+#define S_ARRAY_END( A ) ((A) + S_ARRAY_SIZE(A))
+
+#define CP_REPLACEMENT_CHARACTER (0xFFFD)
+
+namespace YAML
+{
+	enum UtfIntroState {
+		uis_start,
+		uis_utfbe_b1,
+		uis_utf32be_b2,
+		uis_utf32be_bom3,
+		uis_utf32be,
+		uis_utf16be,
+		uis_utf16be_bom1,
+		uis_utfle_bom1,
+		uis_utf16le_bom2,
+		uis_utf32le_bom3,
+		uis_utf16le,
+		uis_utf32le,
+		uis_utf8_imp,
+		uis_utf16le_imp,
+		uis_utf32le_imp3,
+		uis_utf8_bom1,
+		uis_utf8_bom2,
+		uis_utf8,
+		uis_error
+	};
+
+	enum UtfIntroCharType {
+		uict00,
+		uictBB,
+		uictBF,
+		uictEF,
+		uictFE,
+		uictFF,
+		uictAscii,
+		uictOther,
+		uictMax
+	};
+
+	static bool s_introFinalState[] = {
+		false, //uis_start
+		false, //uis_utfbe_b1
+		false, //uis_utf32be_b2
+		false, //uis_utf32be_bom3
+		true,  //uis_utf32be
+		true,  //uis_utf16be
+		false, //uis_utf16be_bom1
+		false, //uis_utfle_bom1
+		false, //uis_utf16le_bom2
+		false, //uis_utf32le_bom3
+		true,  //uis_utf16le
+		true,  //uis_utf32le
+		false, //uis_utf8_imp
+		false, //uis_utf16le_imp
+		false, //uis_utf32le_imp3
+		false, //uis_utf8_bom1
+		false, //uis_utf8_bom2
+		true,  //uis_utf8
+		true,  //uis_error
+	};
+
+	static UtfIntroState s_introTransitions[][uictMax] = {
+		// uict00,           uictBB,           uictBF,           uictEF,           uictFE,           uictFF,           uictAscii,        uictOther
+		  {uis_utfbe_b1,     uis_utf8,         uis_utf8,         uis_utf8_bom1,    uis_utf16be_bom1, uis_utfle_bom1,   uis_utf8_imp,     uis_utf8},
+		  {uis_utf32be_b2,   uis_utf8,         uis_utf8,         uis_utf8,         uis_utf8,         uis_utf8,         uis_utf16be,      uis_utf8},
+		  {uis_utf32be,      uis_utf8,         uis_utf8,         uis_utf8,         uis_utf32be_bom3, uis_utf8,         uis_utf8,         uis_utf8},
+		  {uis_utf8,         uis_utf8,         uis_utf8,         uis_utf8,         uis_utf8,         uis_utf32be,      uis_utf8,         uis_utf8},
+		  {uis_utf32be,      uis_utf32be,      uis_utf32be,      uis_utf32be,      uis_utf32be,      uis_utf32be,      uis_utf32be,      uis_utf32be},
+		  {uis_utf16be,      uis_utf16be,      uis_utf16be,      uis_utf16be,      uis_utf16be,      uis_utf16be,      uis_utf16be,      uis_utf16be},
+		  {uis_utf8,         uis_utf8,         uis_utf8,         uis_utf8,         uis_utf8,         uis_utf16be,      uis_utf8,         uis_utf8},
+		  {uis_utf8,         uis_utf8,         uis_utf8,         uis_utf8,         uis_utf16le_bom2, uis_utf8,         uis_utf8,         uis_utf8},
+		  {uis_utf32le_bom3, uis_utf16le,      uis_utf16le,      uis_utf16le,      uis_utf16le,      uis_utf16le,      uis_utf16le,      uis_utf16le},
+		  {uis_utf32le,      uis_utf16le,      uis_utf16le,      uis_utf16le,      uis_utf16le,      uis_utf16le,      uis_utf16le,      uis_utf16le},
+		  {uis_utf16le,      uis_utf16le,      uis_utf16le,      uis_utf16le,      uis_utf16le,      uis_utf16le,      uis_utf16le,      uis_utf16le},
+		  {uis_utf32le,      uis_utf32le,      uis_utf32le,      uis_utf32le,      uis_utf32le,      uis_utf32le,      uis_utf32le,      uis_utf32le},
+		  {uis_utf16le_imp,  uis_utf8,         uis_utf8,         uis_utf8,         uis_utf8,         uis_utf8,         uis_utf8,         uis_utf8},
+		  {uis_utf32le_imp3, uis_utf16le,      uis_utf16le,      uis_utf16le,      uis_utf16le,      uis_utf16le,      uis_utf16le,      uis_utf16le},
+		  {uis_utf32le,      uis_utf16le,      uis_utf16le,      uis_utf16le,      uis_utf16le,      uis_utf16le,      uis_utf16le,      uis_utf16le},
+		  {uis_utf8,         uis_utf8_bom2,    uis_utf8,         uis_utf8,         uis_utf8,         uis_utf8,         uis_utf8,         uis_utf8},
+		  {uis_utf8,         uis_utf8,         uis_utf8,         uis_utf8,         uis_utf8,         uis_utf8,         uis_utf8,         uis_utf8},
+		  {uis_utf8,         uis_utf8,         uis_utf8,         uis_utf8,         uis_utf8,         uis_utf8,         uis_utf8,         uis_utf8},
+	};
+
+	static char s_introUngetCount[][uictMax] = {
+		// uict00, uictBB, uictBF, uictEF, uictFE, uictFF, uictAscii, uictOther
+		  {0,      1,      1,      0,      0,      0,      0,         1},
+		  {0,      2,      2,      2,      2,      2,      2,         2},
+		  {3,      3,      3,      3,      0,      3,      3,         3},
+		  {4,      4,      4,      4,      4,      0,      4,         4},
+		  {1,      1,      1,      1,      1,      1,      1,         1},
+		  {1,      1,      1,      1,      1,      1,      1,         1},
+		  {2,      2,      2,      2,      2,      0,      2,         2},
+		  {2,      2,      2,      2,      0,      2,      2,         2},
+		  {0,      1,      1,      1,      1,      1,      1,         1},
+		  {0,      2,      2,      2,      2,      2,      2,         2},
+		  {1,      1,      1,      1,      1,      1,      1,         1},
+		  {1,      1,      1,      1,      1,      1,      1,         1},
+		  {0,      2,      2,      2,      2,      2,      2,         2},
+		  {0,      3,      3,      3,      3,      3,      3,         3},
+		  {4,      4,      4,      4,      4,      4,      4,         4},
+		  {2,      0,      2,      2,      2,      2,      2,         2},
+		  {3,      3,      0,      3,      3,      3,      3,         3},
+		  {1,      1,      1,      1,      1,      1,      1,         1},
+	};
+
+	inline UtfIntroCharType IntroCharTypeOf(std::istream::int_type ch)
+	{
+		if (std::istream::traits_type::eof() == ch) {
+			return uictOther;
+		}
+
+		switch (ch) {
+		case 0: return uict00;
+		case 0xBB: return uictBB;
+		case 0xBF: return uictBF;
+		case 0xEF: return uictEF;
+		case 0xFE: return uictFE;
+		case 0xFF: return uictFF;
+		}
+
+		if ((ch > 0) && (ch < 0xFF)) {
+			return uictAscii;
+		}
+
+		return uictOther;
+	}
+
+	inline char Utf8Adjust(unsigned long ch, unsigned char lead_bits, unsigned char rshift)
+	{
+		const unsigned char header = ((1 << lead_bits) - 1) << (8 - lead_bits);
+		const unsigned char mask = (0xFF >> (lead_bits + 1));
+		return static_cast<char>(static_cast<unsigned char>(
+			header | ((ch >> rshift) & mask)
+			));
+	}
+
+	inline void QueueUnicodeCodepoint(std::deque<char>& q, unsigned long ch)
+	{
+		// We are not allowed to queue the Stream::eof() codepoint, so
+		// replace it with CP_REPLACEMENT_CHARACTER
+		if (static_cast<unsigned long>(Stream::eof()) == ch)
+		{
+			ch = CP_REPLACEMENT_CHARACTER;
+		}
+
+		if (ch < 0x80)
+		{
+			q.push_back(Utf8Adjust(ch, 0, 0));
+		}
+		else if (ch < 0x800)
+		{
+			q.push_back(Utf8Adjust(ch, 2, 6));
+			q.push_back(Utf8Adjust(ch, 1, 0));
+		}
+		else if (ch < 0x10000)
+		{
+			q.push_back(Utf8Adjust(ch, 3, 12));
+			q.push_back(Utf8Adjust(ch, 1, 6));
+			q.push_back(Utf8Adjust(ch, 1, 0));
+		}
+		else
+		{
+			q.push_back(Utf8Adjust(ch, 4, 18));
+			q.push_back(Utf8Adjust(ch, 1, 12));
+			q.push_back(Utf8Adjust(ch, 1, 6));
+			q.push_back(Utf8Adjust(ch, 1, 0));
+		}
+	}
+
+	Stream::Stream(std::istream& input)
+		: m_input(input),
+		m_pPrefetched(new unsigned char[YAML_PREFETCH_SIZE]), 
+		m_nPrefetchedAvailable(0), m_nPrefetchedUsed(0)
+	{
+		typedef std::istream::traits_type char_traits;
+
+		if(!input)
+			return;
+
+		// Determine (or guess) the character-set by reading the BOM, if any.  See
+		// the YAML specification for the determination algorithm.
+		char_traits::int_type intro[4];
+		int nIntroUsed = 0;
+		UtfIntroState state = uis_start;
+		for(; !s_introFinalState[state]; ) {
+			std::istream::int_type ch = input.get();
+			intro[nIntroUsed++] = ch;
+			UtfIntroCharType charType = IntroCharTypeOf(ch);
+			UtfIntroState newState = s_introTransitions[state][charType];
+			int nUngets = s_introUngetCount[state][charType];
+			if(nUngets > 0) {
+				input.clear();
+				for(; nUngets > 0; --nUngets) {
+					if(char_traits::eof() != intro[--nIntroUsed])
+						input.putback(char_traits::to_char_type(intro[nIntroUsed]));
+				}
+			}
+			state = newState;
+		}
+
+		switch (state) {
+		case uis_utf8: m_charSet = utf8; break;
+		case uis_utf16le: m_charSet = utf16le; break;
+		case uis_utf16be: m_charSet = utf16be; break;
+		case uis_utf32le: m_charSet = utf32le; break;
+		case uis_utf32be: m_charSet = utf32be; break;
+		default: m_charSet = utf8; break;
+		}
+
+		ReadAheadTo(0);
+	}
+
+	Stream::~Stream()
+	{
+		delete[] m_pPrefetched;
+	}
+
+	char Stream::peek() const
+	{
+		if (m_readahead.empty())
+		{
+			return Stream::eof();
+		}
+
+		return m_readahead[0];
+	}
+	
+	Stream::operator bool() const
+	{
+		return m_input.good() || (!m_readahead.empty() && m_readahead[0] != Stream::eof());
+	}
+
+	// get
+	// . Extracts a character from the stream and updates our position
+	char Stream::get()
+	{
+		char ch = peek();
+		AdvanceCurrent();
+		m_mark.column++;
+		
+		if(ch == '\n') {
+			m_mark.column = 0;
+			m_mark.line++;
+		}
+		
+		return ch;
+	}
+
+	// get
+	// . Extracts 'n' characters from the stream and updates our position
+	std::string Stream::get(int n)
+	{
+		std::string ret;
+		ret.reserve(n);
+		for(int i=0;i<n;i++)
+			ret += get();
+		return ret;
+	}
+
+	// eat
+	// . Eats 'n' characters and updates our position.
+	void Stream::eat(int n)
+	{
+		for(int i=0;i<n;i++)
+			get();
+	}
+
+	void Stream::AdvanceCurrent()
+	{
+		if (!m_readahead.empty())
+		{
+			m_readahead.pop_front();
+			m_mark.pos++;
+		}
+
+		ReadAheadTo(0);
+	}
+
+	bool Stream::_ReadAheadTo(size_t i) const
+	{
+		while (m_input.good() && (m_readahead.size() <= i))
+		{
+			switch (m_charSet)
+			{
+			case utf8: StreamInUtf8(); break;
+			case utf16le: StreamInUtf16(); break;
+			case utf16be: StreamInUtf16(); break;
+			case utf32le: StreamInUtf32(); break;
+			case utf32be: StreamInUtf32(); break;
+			}
+		}
+		
+		// signal end of stream
+		if(!m_input.good())
+			m_readahead.push_back(Stream::eof());
+
+		return m_readahead.size() > i;
+	}
+
+	void Stream::StreamInUtf8() const
+	{
+		unsigned char b = GetNextByte();
+		if (m_input.good())
+		{
+			m_readahead.push_back(b);
+		}
+	}
+
+	void Stream::StreamInUtf16() const
+	{
+		unsigned long ch = 0;
+		unsigned char bytes[2];
+		int nBigEnd = (m_charSet == utf16be) ? 0 : 1;
+
+		bytes[0] = GetNextByte();
+		bytes[1] = GetNextByte();
+		if (!m_input.good())
+		{
+			return;
+		}
+		ch = (static_cast<unsigned long>(bytes[nBigEnd]) << 8) |
+			static_cast<unsigned long>(bytes[1 ^ nBigEnd]);
+
+		if (ch >= 0xDC00 && ch < 0xE000)
+		{
+			// Trailing (low) surrogate...ugh, wrong order
+			QueueUnicodeCodepoint(m_readahead, CP_REPLACEMENT_CHARACTER);
+			return;
+		}
+		else if (ch >= 0xD800 && ch < 0xDC00)
+		{
+			// ch is a leading (high) surrogate
+
+			// Four byte UTF-8 code point
+
+			// Read the trailing (low) surrogate
+			for (;;)
+			{
+				bytes[0] = GetNextByte();
+				bytes[1] = GetNextByte();
+				if (!m_input.good())
+				{
+					QueueUnicodeCodepoint(m_readahead, CP_REPLACEMENT_CHARACTER);
+					return;
+				}
+				unsigned long chLow = (static_cast<unsigned long>(bytes[nBigEnd]) << 8) |
+					static_cast<unsigned long>(bytes[1 ^ nBigEnd]);
+				if (chLow < 0xDC00 || ch >= 0xE000)
+				{
+					// Trouble...not a low surrogate.  Dump a REPLACEMENT CHARACTER into the stream.
+					QueueUnicodeCodepoint(m_readahead, CP_REPLACEMENT_CHARACTER);
+
+					// Deal with the next UTF-16 unit
+					if (chLow < 0xD800 || ch >= 0xE000)
+					{
+						// Easiest case: queue the codepoint and return
+						QueueUnicodeCodepoint(m_readahead, ch);
+						return;
+					}
+					else
+					{
+						// Start the loop over with the new high surrogate
+						ch = chLow;
+						continue;
+					}
+				}
+
+				// Select the payload bits from the high surrogate
+				ch &= 0x3FF;
+				ch <<= 10;
+
+				// Include bits from low surrogate
+				ch |= (chLow & 0x3FF);
+
+				// Add the surrogacy offset
+				ch += 0x10000;
+			}
+		}
+
+		QueueUnicodeCodepoint(m_readahead, ch);
+	}
+
+	inline char* ReadBuffer(unsigned char* pBuffer)
+	{
+		return reinterpret_cast<char*>(pBuffer);
+	}
+
+	unsigned char Stream::GetNextByte() const
+	{
+		if (m_nPrefetchedUsed >= m_nPrefetchedAvailable)
+		{
+			std::streambuf *pBuf = m_input.rdbuf();
+			m_nPrefetchedAvailable = (size_t)pBuf->sgetn(ReadBuffer(m_pPrefetched), 
+				YAML_PREFETCH_SIZE);
+			m_nPrefetchedUsed = 0;
+			if (!m_nPrefetchedAvailable)
+			{
+				m_input.setstate(std::ios_base::eofbit);
+			}
+
+			if (0 == m_nPrefetchedAvailable)
+			{
+				return 0;
+			}
+		}
+
+		return m_pPrefetched[m_nPrefetchedUsed++];
+	}
+
+	void Stream::StreamInUtf32() const
+	{
+		static int indexes[2][4] = {
+			{3, 2, 1, 0},
+			{0, 1, 2, 3}
+		};
+
+		unsigned long ch = 0;
+		unsigned char bytes[4];
+		int* pIndexes = (m_charSet == utf32be) ? indexes[1] : indexes[0];
+
+		bytes[0] = GetNextByte();
+		bytes[1] = GetNextByte();
+		bytes[2] = GetNextByte();
+		bytes[3] = GetNextByte();
+		if (!m_input.good())
+		{
+			return;
+		}
+
+		for (int i = 0; i < 4; ++i)
+		{
+			ch <<= 8;
+			ch |= bytes[pIndexes[i]];
+		}
+
+		QueueUnicodeCodepoint(m_readahead, ch);
+	}
+}