diff options
Diffstat (limited to 'src/cajun/reader.cpp')
| -rw-r--r-- | src/cajun/reader.cpp | 534 |
1 files changed, 534 insertions, 0 deletions
diff --git a/src/cajun/reader.cpp b/src/cajun/reader.cpp new file mode 100644 index 0000000..f625367 --- /dev/null +++ b/src/cajun/reader.cpp @@ -0,0 +1,534 @@ +/****************************************************************************** + +Copyright (c) 2009-2010, Terry Caton +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the projecct nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +******************************************************************************/ + +#include <cassert> +#include <set> +#include <sstream> +#include "reader.h" + +/* + +TODO: +* better documentation +* unicode character decoding + +*/ + +namespace json +{ + +inline std::istream& operator >> (std::istream& istr, UnknownElement& elementRoot) { + Reader::Read(elementRoot, istr); + return istr; +} + +inline Reader::Location::Location() : + m_nLine(0), + m_nLineOffset(0), + m_nDocOffset(0) +{} + + +////////////////////// +// Reader::InputStream + +class Reader::InputStream // would be cool if we could inherit from std::istream & override "get" +{ +public: + InputStream(std::istream& iStr) : + m_iStr(iStr) {} + + // protect access to the input stream, so we can keeep track of document/line offsets + char Get(); // big, define outside + char Peek() { + assert(m_iStr.eof() == false); // enforce reading of only valid stream data + return m_iStr.peek(); + } + + bool EOS() { + m_iStr.peek(); // apparently eof flag isn't set until a character read is attempted. whatever. + return m_iStr.eof(); + } + + const Location& GetLocation() const { return m_Location; } + +private: + std::istream& m_iStr; + Location m_Location; +}; + + +inline char Reader::InputStream::Get() +{ + assert(m_iStr.eof() == false); // enforce reading of only valid stream data + char c = m_iStr.get(); + + ++m_Location.m_nDocOffset; + if (c == '\n') { + ++m_Location.m_nLine; + m_Location.m_nLineOffset = 0; + } + else { + ++m_Location.m_nLineOffset; + } + + return c; +} + + + +////////////////////// +// Reader::TokenStream + +class Reader::TokenStream +{ +public: + TokenStream(const Tokens& tokens); + + const Token& Peek(); + const Token& Get(); + + bool EOS() const; + +private: + const Tokens& m_Tokens; + Tokens::const_iterator m_itCurrent; +}; + + +inline Reader::TokenStream::TokenStream(const Tokens& tokens) : + m_Tokens(tokens), + m_itCurrent(tokens.begin()) +{} + +inline const Reader::Token& Reader::TokenStream::Peek() { + if (EOS()) + { + const Token& lastToken = *m_Tokens.rbegin(); + std::string sMessage = "Unexpected end of token stream"; + throw ParseException(sMessage, lastToken.locBegin, lastToken.locEnd); // nowhere to point to + } + return *(m_itCurrent); +} + +inline const Reader::Token& Reader::TokenStream::Get() { + const Token& token = Peek(); + ++m_itCurrent; + return token; +} + +inline bool Reader::TokenStream::EOS() const { + return m_itCurrent == m_Tokens.end(); +} + +/////////////////// +// Reader (finally) + + +inline void Reader::Read(Object& object, std::istream& istr) { Read_i(object, istr); } +inline void Reader::Read(Array& array, std::istream& istr) { Read_i(array, istr); } +inline void Reader::Read(String& string, std::istream& istr) { Read_i(string, istr); } +inline void Reader::Read(Number& number, std::istream& istr) { Read_i(number, istr); } +inline void Reader::Read(Boolean& boolean, std::istream& istr) { Read_i(boolean, istr); } +inline void Reader::Read(Null& null, std::istream& istr) { Read_i(null, istr); } +inline void Reader::Read(UnknownElement& unknown, std::istream& istr) { Read_i(unknown, istr); } + + +template <typename ElementTypeT> +void Reader::Read_i(ElementTypeT& element, std::istream& istr) +{ + Reader reader; + + Tokens tokens; + InputStream inputStream(istr); + reader.Scan(tokens, inputStream); + + TokenStream tokenStream(tokens); + reader.Parse(element, tokenStream); + + if (tokenStream.EOS() == false) + { + const Token& token = tokenStream.Peek(); + std::string sMessage = std::string("Expected End of token stream; found ") + token.sValue; + throw ParseException(sMessage, token.locBegin, token.locEnd); + } +} + + +inline void Reader::Scan(Tokens& tokens, InputStream& inputStream) +{ + while (EatWhiteSpace(inputStream), // ignore any leading white space... + inputStream.EOS() == false) // ...before checking for EOS + { + // if all goes well, we'll create a token each pass + Token token; + token.locBegin = inputStream.GetLocation(); + + // gives us null-terminated string + char sChar = inputStream.Peek(); + switch (sChar) + { + case '{': + token.sValue = MatchExpectedString(inputStream, "{"); + token.nType = Token::TOKEN_OBJECT_BEGIN; + break; + + case '}': + token.sValue = MatchExpectedString(inputStream, "}"); + token.nType = Token::TOKEN_OBJECT_END; + break; + + case '[': + token.sValue = MatchExpectedString(inputStream, "["); + token.nType = Token::TOKEN_ARRAY_BEGIN; + break; + + case ']': + token.sValue = MatchExpectedString(inputStream, "]"); + token.nType = Token::TOKEN_ARRAY_END; + break; + + case ',': + token.sValue = MatchExpectedString(inputStream, ","); + token.nType = Token::TOKEN_NEXT_ELEMENT; + break; + + case ':': + token.sValue = MatchExpectedString(inputStream, ":"); + token.nType = Token::TOKEN_MEMBER_ASSIGN; + break; + + case '"': + token.sValue = MatchString(inputStream); + token.nType = Token::TOKEN_STRING; + break; + + case '-': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + token.sValue = MatchNumber(inputStream); + token.nType = Token::TOKEN_NUMBER; + break; + + case 't': + token.sValue = MatchExpectedString(inputStream, "true"); + token.nType = Token::TOKEN_BOOLEAN; + break; + + case 'f': + token.sValue = MatchExpectedString(inputStream, "false"); + token.nType = Token::TOKEN_BOOLEAN; + break; + + case 'n': + token.sValue = MatchExpectedString(inputStream, "null"); + token.nType = Token::TOKEN_NULL; + break; + + default: + { + std::string sErrorMessage = std::string("Unexpected character in stream: ") + sChar; + throw ScanException(sErrorMessage, inputStream.GetLocation()); + } + } + + token.locEnd = inputStream.GetLocation(); + tokens.push_back(token); + } +} + + +inline void Reader::EatWhiteSpace(InputStream& inputStream) +{ + while (inputStream.EOS() == false && + ::isspace(inputStream.Peek())) + inputStream.Get(); +} + +inline std::string Reader::MatchExpectedString(InputStream& inputStream, const std::string& sExpected) +{ + std::string::const_iterator it(sExpected.begin()), + itEnd(sExpected.end()); + for ( ; it != itEnd; ++it) { + if (inputStream.EOS() || // did we reach the end before finding what we're looking for... + inputStream.Get() != *it) // ...or did we find something different? + { + std::string sMessage = std::string("Expected string: ") + sExpected; + throw ScanException(sMessage, inputStream.GetLocation()); + } + } + + // all's well if we made it here + return sExpected; +} + + +inline std::string Reader::MatchString(InputStream& inputStream) +{ + MatchExpectedString(inputStream, "\""); + + std::string string; + while (inputStream.EOS() == false && + inputStream.Peek() != '"') + { + char c = inputStream.Get(); + + // escape? + if (c == '\\' && + inputStream.EOS() == false) // shouldn't have reached the end yet + { + c = inputStream.Get(); + switch (c) { + case '/': string.push_back('/'); break; + case '"': string.push_back('"'); break; + case '\\': string.push_back('\\'); break; + case 'b': string.push_back('\b'); break; + case 'f': string.push_back('\f'); break; + case 'n': string.push_back('\n'); break; + case 'r': string.push_back('\r'); break; + case 't': string.push_back('\t'); break; + //case 'u': string.push_back('\u'); break; // TODO: what do we do with this? + default: { + std::string sMessage = std::string("Unrecognized escape sequence found in string: \\") + c; + throw ScanException(sMessage, inputStream.GetLocation()); + } + } + } + else { + string.push_back(c); + } + } + + // eat the last '"' that we just peeked + MatchExpectedString(inputStream, "\""); + + // all's well if we made it here + return string; +} + + +inline std::string Reader::MatchNumber(InputStream& inputStream) +{ + const char sNumericChars[] = "0123456789.eE-+"; + std::set<char> numericChars; + numericChars.insert(sNumericChars, sNumericChars + sizeof(sNumericChars)); + + std::string sNumber; + while (inputStream.EOS() == false && + numericChars.find(inputStream.Peek()) != numericChars.end()) + { + sNumber.push_back(inputStream.Get()); + } + + return sNumber; +} + + +inline void Reader::Parse(UnknownElement& element, Reader::TokenStream& tokenStream) +{ + const Token& token = tokenStream.Peek(); + switch (token.nType) { + case Token::TOKEN_OBJECT_BEGIN: + { + // implicit non-const cast will perform conversion for us (if necessary) + Object& object = element; + Parse(object, tokenStream); + break; + } + + case Token::TOKEN_ARRAY_BEGIN: + { + Array& array = element; + Parse(array, tokenStream); + break; + } + + case Token::TOKEN_STRING: + { + String& string = element; + Parse(string, tokenStream); + break; + } + + case Token::TOKEN_NUMBER: + { + Number& number = element; + Parse(number, tokenStream); + break; + } + + case Token::TOKEN_BOOLEAN: + { + Boolean& boolean = element; + Parse(boolean, tokenStream); + break; + } + + case Token::TOKEN_NULL: + { + Null& null = element; + Parse(null, tokenStream); + break; + } + + default: + { + std::string sMessage = std::string("Unexpected token: ") + token.sValue; + throw ParseException(sMessage, token.locBegin, token.locEnd); + } + } +} + + +inline void Reader::Parse(Object& object, Reader::TokenStream& tokenStream) +{ + MatchExpectedToken(Token::TOKEN_OBJECT_BEGIN, tokenStream); + + bool bContinue = (tokenStream.EOS() == false && + tokenStream.Peek().nType != Token::TOKEN_OBJECT_END); + while (bContinue) + { + Object::Member member; + + // first the member name. save the token in case we have to throw an exception + const Token& tokenName = tokenStream.Peek(); + member.name = MatchExpectedToken(Token::TOKEN_STRING, tokenStream); + + // ...then the key/value separator... + MatchExpectedToken(Token::TOKEN_MEMBER_ASSIGN, tokenStream); + + // ...then the value itself (can be anything). + Parse(member.element, tokenStream); + + // try adding it to the object (this could throw) + try + { + object.Insert(member); + } + catch (Exception&) + { + // must be a duplicate name + std::string sMessage = std::string("Duplicate object member token: ") + member.name; + throw ParseException(sMessage, tokenName.locBegin, tokenName.locEnd); + } + + bContinue = (tokenStream.EOS() == false && + tokenStream.Peek().nType == Token::TOKEN_NEXT_ELEMENT); + if (bContinue) + MatchExpectedToken(Token::TOKEN_NEXT_ELEMENT, tokenStream); + } + + MatchExpectedToken(Token::TOKEN_OBJECT_END, tokenStream); +} + + +inline void Reader::Parse(Array& array, Reader::TokenStream& tokenStream) +{ + MatchExpectedToken(Token::TOKEN_ARRAY_BEGIN, tokenStream); + + bool bContinue = (tokenStream.EOS() == false && + tokenStream.Peek().nType != Token::TOKEN_ARRAY_END); + while (bContinue) + { + // ...what's next? could be anything + Array::iterator itElement = array.Insert(UnknownElement()); + UnknownElement& element = *itElement; + Parse(element, tokenStream); + + bContinue = (tokenStream.EOS() == false && + tokenStream.Peek().nType == Token::TOKEN_NEXT_ELEMENT); + if (bContinue) + MatchExpectedToken(Token::TOKEN_NEXT_ELEMENT, tokenStream); + } + + MatchExpectedToken(Token::TOKEN_ARRAY_END, tokenStream); +} + + +inline void Reader::Parse(String& string, Reader::TokenStream& tokenStream) +{ + string = MatchExpectedToken(Token::TOKEN_STRING, tokenStream); +} + + +inline void Reader::Parse(Number& number, Reader::TokenStream& tokenStream) +{ + const Token& currentToken = tokenStream.Peek(); // might need this later for throwing exception + const std::string& sValue = MatchExpectedToken(Token::TOKEN_NUMBER, tokenStream); + + std::istringstream iStr(sValue); + double dValue; + iStr >> dValue; + + // did we consume all characters in the token? + if (iStr.eof() == false) + { + char c = iStr.peek(); + std::string sMessage = std::string("Unexpected character in NUMBER token: ") + c; + throw ParseException(sMessage, currentToken.locBegin, currentToken.locEnd); + } + + number = dValue; +} + + +inline void Reader::Parse(Boolean& boolean, Reader::TokenStream& tokenStream) +{ + const std::string& sValue = MatchExpectedToken(Token::TOKEN_BOOLEAN, tokenStream); + boolean = (sValue == "true" ? true : false); +} + + +inline void Reader::Parse(Null&, Reader::TokenStream& tokenStream) +{ + MatchExpectedToken(Token::TOKEN_NULL, tokenStream); +} + + +inline const std::string& Reader::MatchExpectedToken(Token::Type nExpected, Reader::TokenStream& tokenStream) +{ + const Token& token = tokenStream.Get(); + if (token.nType != nExpected) + { + std::string sMessage = std::string("Unexpected token: ") + token.sValue; + throw ParseException(sMessage, token.locBegin, token.locEnd); + } + + return token.sValue; +} + +} // End namespace |
