Added support for reading UTF-8 encoded of xml files

This commit is contained in:
Robert Osfield 2018-09-04 12:19:14 +01:00
parent ae3133522d
commit afe5644b9f
2 changed files with 66 additions and 22 deletions

View File

@ -105,9 +105,9 @@ class OSGDB_EXPORT XmlNode : public osg::Referenced
size_type currentPosition() const { return _currentPos; }
int get() { if (_currentPos<_buffer.size()) return _buffer[_currentPos++]; else return -1; }
int get() { if (_currentPos<_buffer.size()) return static_cast<unsigned char>(_buffer[_currentPos++]); else return -1; }
int operator [] (size_type i) const { if ((_currentPos+i)<_buffer.size()) return _buffer[_currentPos+i]; else return -1; }
int operator [] (size_type i) const { if ((_currentPos+i)<_buffer.size()) return static_cast<unsigned char>(_buffer[_currentPos+i]); else return -1; }
void operator ++ () { if (_currentPos<_buffer.size()) ++_currentPos; }
@ -129,12 +129,56 @@ class OSGDB_EXPORT XmlNode : public osg::Referenced
bool match(const std::string& str) { return (_currentPos<_buffer.size()) ? _buffer.compare(_currentPos, str.size(), str)==0 : false; }
enum Encoding
{
ENCODING_ASCII,
ENCODING_UTF8
};
void setEncoding(Encoding encoding) { _encoding = encoding; }
Encoding getEncoding() const { return _encoding; }
inline void copyCharacterToString(std::string& str)
{
if (_currentPos>=_buffer.size()) return;
switch (_encoding)
{
case(ENCODING_UTF8) :
{
int char0 = static_cast<unsigned char>(_buffer[_currentPos]); ++_currentPos;
str.push_back(char0);
if (char0 < 0x80 || _currentPos>=_buffer.size()) break; // 1-byte character
str.push_back(_buffer[_currentPos]); ++_currentPos;
if (char0<0xe0 || _currentPos<_buffer.size()) break; // 2-byte character
str.push_back(_buffer[_currentPos]); ++_currentPos;
if (char0<0xf0 || _currentPos>=_buffer.size()) break; // 3-byte character
str.push_back(_buffer[_currentPos]); ++_currentPos;
if (char0<0xf8 || _currentPos>=_buffer.size()) break; // 4-byte character
if (_currentPos>=_buffer.size()) break;
str.push_back(_buffer[_currentPos]); ++_currentPos; // 5-byte character?
break;
}
case(ENCODING_ASCII) :
default:
str.push_back(_buffer[_currentPos]);
++_currentPos;
return;
}
}
private:
size_type _currentPos;
size_type _currentPos;
std::ifstream _fin;
std::string _buffer;
std::ifstream _fin;
std::string _buffer;
Encoding _encoding;
};

View File

@ -101,13 +101,15 @@ void XmlNode::ControlMap::setUpControlMappings()
}
XmlNode::Input::Input():
_currentPos(0)
_currentPos(0),
_encoding(ENCODING_ASCII)
{
}
XmlNode::Input::Input(const Input&):
XmlNode::Input::Input(const Input& rhs):
ControlMap(),
_currentPos(0)
_currentPos(0),
_encoding(rhs._encoding)
{
}
@ -251,6 +253,11 @@ bool XmlNode::read(Input& input)
commentNode->contents = input.substr(0, end);
if (end!=std::string::npos)
{
if (commentNode->contents.find("encoding=\"UTF-8\"")!=std::string::npos)
{
input.setEncoding(Input::ENCODING_UTF8);
}
OSG_INFO<<"Valid information record ["<<commentNode->contents<<"]"<<std::endl;
input += (end+2);
}
@ -273,8 +280,7 @@ bool XmlNode::read(Input& input)
int c = 0;
while ((c=input[0])>=0 && c!=' ' && c!='\n' && c!='\r' && c!='>' && c!='/')
{
childNode->name.push_back(c);
++input;
input.copyCharacterToString(childNode->name);
}
while ((c=input[0])>=0 && c!='>' && c!='/')
@ -295,8 +301,7 @@ bool XmlNode::read(Input& input)
readAndReplaceControl(option, input);
else
{
option.push_back(c);
++input;
input.copyCharacterToString(option);
}
}
option.push_back(input[0]);
@ -306,8 +311,7 @@ bool XmlNode::read(Input& input)
{
while((c=input[0])>=0 && c!='>' && c!='/' && c!='"' && c!='\'' && c!='=' && c!=' ' && c!='\n' && c!='\r')
{
option.push_back(c);
++input;
input.copyCharacterToString(option);
}
}
@ -327,8 +331,7 @@ bool XmlNode::read(Input& input)
readAndReplaceControl(value, input);
else
{
value.push_back(c);
++input;
input.copyCharacterToString(value);
}
}
++input;
@ -342,8 +345,7 @@ bool XmlNode::read(Input& input)
readAndReplaceControl(value, input);
else
{
value.push_back(c);
++input;
input.copyCharacterToString(value);
}
}
++input;
@ -353,8 +355,7 @@ bool XmlNode::read(Input& input)
++input;
while((c=input[0])>=0 && c!=' ' && c!='\n' && c!='\r' && c!='"' && c!='\'' && c!='>')
{
value.push_back(c);
++input;
input.copyCharacterToString(value);
}
}
}
@ -414,8 +415,7 @@ bool XmlNode::read(Input& input)
}
else
{
contents.push_back( c );
++input;
input.copyCharacterToString(contents);
}
}