Added osgText::String implementation to better handle decoding of encoded text.

2003-03-05 21:05:37 +00:00 · 2003-03-05 21:05:37 +00:00 · 63df52d408
commit 63df52d408
parent f9d8f3fa9d
7 changed files with 433 additions and 70 deletions
--- a/NEWS.txt
+++ b/NEWS.txt
@ -3,6 +3,16 @@
 OSG News (most significant items from ChangeLog)
 ================================================

+
+    Improved thread safety when working multipipe systems.
+
+    New OpenProducer based examples.
+
+    New MD2 plugin
+
+    New osgText implementions.  
+
+
 24th January 2003 - OpenSceneGraph-0.9.3.tar.gz

    >>> adds support navigational light points, multi-threaded dynamic
--- a/VisualStudio/osgText/osgText.dsp
+++ b/VisualStudio/osgText/osgText.dsp
@ -105,6 +105,10 @@ SOURCE=..\..\src\osgText\Text.cpp
 # End Source File
 # Begin Source File

+SOURCE=..\..\src\osgText\String.cpp
+# End Source File
+# Begin Source File
+
 SOURCE=..\..\src\osgText\Version.cpp
 # End Source File
 # End Group
@ -121,6 +125,10 @@ SOURCE=..\..\include\osgText\Font
 # End Source File
 # Begin Source File

+SOURCE=..\..\include\osgText\String
+# End Source File
+# Begin Source File
+
 SOURCE=..\..\include\osgText\Text
 # End Source File
 # Begin Source File
--- a/include/osgText/String
+++ b/include/osgText/String
@ -0,0 +1,75 @@
+/* -*-c++-*- OpenSceneGraph - Copyright (C) 1998-2003 Robert Osfield 
+ *
+ * This library is open source and may be redistributed and/or modified under  
+ * the terms of the OpenSceneGraph Public License (OSGPL) version 0.0 or 
+ * (at your option) any later version.  The full license is in LICENSE file
+ * included with this distribution, and on the openscenegraph.org website.
+ * 
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
+ * OpenSceneGraph Public License for more details.
+*/
+
+#ifndef OSGTEXT_STRING
+#define OSGTEXT_STRING 1
+
+#include <osg/Referenced>
+#include <osgText/Export>
+
+#include <vector>
+#include <set>
+#include <string>
+
+namespace osgText {
+
+class Text;
+
+class OSGTEXT_EXPORT String : public osg::Referenced, public std::vector<unsigned int>
+{
+public:
+
+    String() {}
+    String(const String& str);
+    virtual ~String() {} // public temporily while osgText is still in flux.
+
+    String& operator = (const String& str);
+
+    void set(const std::string& str);
+
+    /** Set the text using a wchar_t string, 
+      * which is converted to an internal TextString.*/
+    void set(const wchar_t* text);
+
+    /**
+     * Types of string encodings supported
+     */
+    enum Encoding
+    {
+        ENCODING_UNDEFINED,                 /// not using Unicode
+        ENCODING_ASCII = ENCODING_UNDEFINED,/// unsigned char ASCII
+        ENCODING_UTF8,                      /// 8-bit unicode transformation format
+        ENCODING_UTF16,                     /// 16-bit signature
+        ENCODING_UTF16_BE,                  /// 16-bit big-endian
+        ENCODING_UTF16_LE,                  /// 16-bit little-endian
+        ENCODING_UTF32,                     /// 32-bit signature
+        ENCODING_UTF32_BE,                  /// 32-bit big-endian
+        ENCODING_UTF32_LE,                  /// 32-bit little-endian
+        ENCODING_SIGNATURE                  /// detect encoding from signature
+    };
+
+    /** Set the text using a Unicode encoded std::string, which is converted to an internal TextString.
+      * The encoding parameter specificies which Unicode encodeding is used in the std::string. */
+    void set(const std::string& text,Encoding encoding);
+
+    /** returns a UTF8 encoded version of this osgText::String.*/
+    std::string createUTF8EncodedString() const;
+
+protected:
+
+};
+
+}
+
+
+#endif
--- a/include/osgText/Text
+++ b/include/osgText/Text
@ -18,6 +18,7 @@
 #include <osg/Quat>

 #include <osgText/Font>
+#include <osgText/String>

 namespace osgText {

@ -56,44 +57,23 @@ public:
    unsigned int getFontHeight() const { return _fontWidth; }
    
    
-    /** TextString is a general purpose vector of char codes (unsigned int's)
-      * which is used internally by Text to represent strings.*/
-    typedef std::vector<unsigned int> TextString;
-
-    /** Set the text using a TextString.*/
-    void setText(const TextString& text);
+    /** Set the text using a osgText::String.*/
+    void setText(const String& text);

    /** Set the text using a std::string, 
      * which is converted to an internal TextString.*/
    void setText(const std::string& text);

-    /**
-     * Types of string encodings supported
-     */
-    enum Encoding
-    {
-        ENCODING_UNDEFINED,                 /// not using Unicode
-        ENCODING_ASCII = ENCODING_UNDEFINED,/// unsigned char ASCII
-        ENCODING_UTF8,                      /// 8-bit unicode transformation format
-        ENCODING_UTF16,                     /// 16-bit signature
-        ENCODING_UTF16_BE,                  /// 16-bit big-endian
-        ENCODING_UTF16_LE,                  /// 16-bit little-endian
-        ENCODING_UTF32,                     /// 32-bit signature
-        ENCODING_UTF32_BE,                  /// 32-bit big-endian
-        ENCODING_UTF32_LE,                  /// 32-bit little-endian
-        ENCODING_SIGNATURE                  /// detect encoding from signature
-    };
-
    /** Set the text using a Unicode encoded std::string, which is converted to an internal TextString.
      * The encoding parameter specificies which Unicode encodeding is used in the std::string. */
-    void setText(const std::string& text,Encoding encoding);
+    void setText(const std::string& text,String::Encoding encoding);

    /** Set the text using a wchar_t string, 
      * which is converted to an internal TextString.*/
    void setText(const wchar_t* text);
    
    /** Get the const text string.*/
-    const TextString& getText() const { return _text; }
+    const String& getText() const { return _text; }


    /** Set the rendered character size in object coordinates.*/
@ -197,20 +177,20 @@ protected:

    
    // members which have public access.
-    osg::ref_ptr<Font>  _font;
-    unsigned int        _fontWidth;
-    unsigned int        _fontHeight;
-    float               _characterHeight;
-    float               _characterAspectRatio;
+    osg::ref_ptr<Font>      _font;
+    unsigned int            _fontWidth;
+    unsigned int            _fontHeight;
+    float                   _characterHeight;
+    float                   _characterAspectRatio;

-    TextString          _text;
-    osg::Vec3           _position;
-    AlignmentType       _alignment;
-    AxisAlignment       _axisAlignment;
-    osg::Quat           _rotation;
-    Layout              _layout;
-    osg::Vec4           _color;
-    unsigned int        _drawMode;
+    String                  _text;
+    osg::Vec3               _position;
+    AlignmentType           _alignment;
+    AxisAlignment           _axisAlignment;
+    osg::Quat               _rotation;
+    Layout                  _layout;
+    osg::Vec4               _color;
+    unsigned int            _drawMode;

    // internal structures, variable and methods used for rendering of characters.
    struct GlyphQuads
--- a/src/osgText/Makefile
+++ b/src/osgText/Makefile
@ -6,6 +6,7 @@ CXXFILES = \
    DefaultFont.cpp\
    Font.cpp\
    Text.cpp\
+    String.cpp\
    Version.cpp\

 DEF     += -DOSGTEXT_LIBRARY
--- a/src/osgText/String.cpp
+++ b/src/osgText/String.cpp
@ -0,0 +1,314 @@
+#include <osgText/String>
+
+#include <osg/Math>
+#include <osg/Notify>
+
+using namespace osgText;
+
+////////////////////////////////////////////////////////////////////////
+//
+// helper class to make it safer to querry std::string's for encoding.
+//
+struct look_ahead_iterator
+{
+    look_ahead_iterator(const std::string& string):
+        _string(string),
+        _index(0),
+        _nullCharacter(0) {}
+        
+    bool valid() const { return _index<_string.length(); }
+    
+    look_ahead_iterator& operator ++ ()
+    {
+        if (_index<_string.length()) ++_index; 
+        return *this;
+    }
+    
+    look_ahead_iterator operator ++ (int)
+    {
+        look_ahead_iterator tmp(*this); 
+        if (_index<_string.length()) ++_index; 
+        return tmp;
+    }
+
+    look_ahead_iterator& operator += (int offset)
+    {
+        if (_index<_string.length()) _index = osg::minimum(_index+offset,_string.length());
+        return *this;
+    }
+    
+    unsigned char operator * () const
+    {
+        if (_index<_string.length()) return _string[_index];
+        else return _nullCharacter;
+    }
+
+    unsigned char operator [] (unsigned int offset) const
+    {
+
+        if (_index+offset<_string.length()) return _string[_index+offset];
+        else return _nullCharacter;
+    }
+    
+
+    const std::string&      _string;
+    unsigned int            _index;
+    unsigned char           _nullCharacter;
+};
+
+String::Encoding findEncoding(look_ahead_iterator& charString,String::Encoding overrideEncoding)
+{
+    switch (charString[0])
+    {
+        case 0xEF: // 8-bit encoding
+        {
+            // 8-bit signature = EF BB BF
+            if ((charString[1]==0xBB) && (charString[2]==0xBF))
+            {
+                charString+=3;
+                return String::ENCODING_UTF8;
+            }
+            break;
+        }
+        case 0xFE: // big-endian 16-bit
+        {
+            // 16-bit signature = FE FF
+            if (charString[1]==0xFF)
+            {
+                charString+=2;
+                return String::ENCODING_UTF16_BE;
+            }
+            break;
+        }
+        case 0xFF: // little-endian
+        {
+            // 16-bit signature = FF FE
+            // 32-bit signature = FF FE 00 00
+            if (charString[1]==0xFE)
+            {
+                // NOTE: There is an a potential problem as a 16-bit empty string
+                // is identical to a 32-bit start signature
+                if ((charString[2]==0) && (charString[3]==0) && (overrideEncoding != String::ENCODING_UTF16)) //32-bit
+                {
+                    charString+=4;
+                    return String::ENCODING_UTF32_LE;
+                }
+                else //16-bit
+                {
+                    charString+=2;
+                    return String::ENCODING_UTF16_LE;
+                }
+            }
+            break;
+        }
+        case 0x00: // 32-bit big-endian
+        {
+            // 32-bit signature = 00 00 FE FF
+            if ((charString[1]==0x00) && (charString[2]==0xFE) && (charString[3]==0xFF))
+            {
+                charString+=4;
+                return String::ENCODING_UTF32_BE;
+            }
+            break;
+        }
+    }
+    return String::ENCODING_ASCII;
+}
+
+
+unsigned int getNextCharacter(look_ahead_iterator& charString,String::Encoding encoding)
+{
+    // For more info on unicode encodings see: 
+    // http://www-106.ibm.com/developerworks/unicode/library/u-encode.html
+    switch(encoding)
+    {
+        case String::ENCODING_ASCII:
+        {
+            return *charString++;
+        }
+        case String::ENCODING_UTF8:
+        {
+            int char0 = *charString++;
+            if (char0 < 0x80) // 1-byte character
+            {
+                return char0;
+            }
+            int char1 = *charString++;
+            if (char0<0xe0) // 2-byte character
+            {
+                return ((char0&0x1f)<<6) | (char1&0x3f);
+            }
+            int char2 = *charString++;
+            if (char0<0xf0) // 3-byte character
+            {
+                return ((char0&0xf)<<12) | ((char1&0x3f)<<6) | (char2&0x3f);
+            }
+            int char3 = *charString++;
+            if (char0<0xf8) // 4-byte character
+            {
+                return ((char0&0x7)<<18) | ((char1&0x3f)<<12) | ((char2&0x3f)<<6) | (char3&0x3f);
+            }
+            break;
+        }
+        case String::ENCODING_UTF16_BE:
+        {
+            int char0 = *charString++;
+            int char1 = *charString++;
+            if ((char0<=0xD7) || (char0>=0xE0)) // simple character
+            {
+                return (char0<<8) | char1;
+            }
+            else if ((char0>=0xD8)&&(char0<=0xDB)) //using planes (this should get called very rarely)
+            {
+                int char2 = *charString++;
+                int char3 = *charString++;
+                int highSurrogate = (char0<<8) | char1; 
+                int lowSurrogate = (char2<<8) | char3;
+                if ((char2>=0xDC)&&(char2<=0xDF)) //only for the valid range of low surrogate
+                {
+                    // This covers the range of all 17 unicode planes
+                    return ((highSurrogate-0xD800)*0x400) + (lowSurrogate-0xD800) + 0x10000;
+                }
+            }
+            break;
+        }
+        case String::ENCODING_UTF16_LE:
+        {
+            int char1 = *charString++;
+            int char0 = *charString++;
+            if ((char0<=0xD7) || (char0>=0xE0)) // simple character
+            {
+                return (char0<<8) | char1;
+            }
+            else if ((char0>=0xD8)&&(char0<=0xDB)) //using planes (this should get called very rarely)
+            {
+                int char3 = *charString++;
+                int char2 = *charString++;
+                int highSurrogate = (char0<<8) | char1; 
+                int lowSurrogate = (char2<<8) | char3;
+                if ((char2>=0xDC)&&(char2<=0xDF)) //only for the valid range of low surrogate
+                {
+                    // This covers the range of all 17 unicode planes
+                    return ((highSurrogate-0xD800)*0x400) + (lowSurrogate-0xD800) + 0x10000;
+                }
+            }
+            break;
+        }
+        case String::ENCODING_UTF32_BE:
+        {
+            int character = ((((int)charString[0])<<24) | (((int)charString[1])<<16) |
+                            (((int)charString[2])<<8) | charString[3]);
+            charString+=4;
+            if (character<0x110000) 
+            { 
+                // Character is constrained to the range set by the unicode standard 
+                return character;
+            }
+            break;
+        }
+        case String::ENCODING_UTF32_LE:
+        {
+            int character = ((((int)charString[3])<<24) | (((int)charString[2])<<16) |
+                            (((int)charString[1])<<8) | charString[0]);
+            charString+=4;
+            if (character<0x110000) 
+            { 
+                // Character is constrained to the range set by the unicode standard 
+                return character;
+            }
+            break;
+        }
+        default:
+        {
+            // Should not reach this point unless the encoding is unhandled
+            // ENCODING_UTF16, ENCODING_UTF32 and ENCODING_SIGNATURE should never enter this method
+            osg::notify(osg::FATAL)<<"Error: Invalid string encoding"<<std::endl;    
+            break;
+        }
+    }
+    return 0;
+}
+
+
+////////////////////////////////////////////////////////////////////////////
+//
+// String implemention.
+//
+
+String::String(const String& str):
+    Referenced(),
+    std::vector<unsigned int>(str)
+{
+}
+
+String& String::operator = (const String& str)
+{
+    if (&str==this) return *this;
+    
+    clear();
+    std::copy(str.begin(),str.end(),std::back_inserter(*this));
+    
+    return *this;
+}
+
+void String::set(const std::string& text)
+{
+    clear();
+    std::copy(text.begin(),text.end(),std::back_inserter(*this));
+}
+
+void String::set(const wchar_t* text)
+{
+    clear();
+    while(*text)
+    {
+        push_back(*text++);
+    }
+}
+
+void String::set(const std::string& text,Encoding encoding)
+{
+    clear();
+
+    look_ahead_iterator itr(text);
+
+    if ((encoding == ENCODING_SIGNATURE) || 
+        (encoding == ENCODING_UTF16) || 
+        (encoding == ENCODING_UTF32))
+    {
+        encoding = findEncoding(itr,encoding);
+    }
+    
+    while(itr.valid())
+    {
+        unsigned int c = getNextCharacter(itr,encoding);
+        if (c) push_back(c);
+    }
+}
+
+std::string String::createUTF8EncodedString() const
+{
+    std::string utf8string;
+    for(const_iterator itr=begin();
+        itr!=end();
+        ++itr)
+    {
+        unsigned int currentChar = *itr;
+        if (currentChar < 0x80)
+        {
+            utf8string+=(char)currentChar;
+        }
+        else if (currentChar < 0x800)
+        {
+            utf8string+=(char)(0xc0 | (currentChar>>6));
+            utf8string+=(char)(0x80 | currentChar & 0x3f);
+        }
+        else
+        {
+            utf8string+=(char)(0xe0 | (currentChar>>12));
+            utf8string+=(char)(0x80 | (currentChar>>6) & 0x3f);
+            utf8string+=(char)(0x80 | currentChar & 0x3f);
+        }
+    }
+    return utf8string;
+}
--- a/src/osgText/Text.cpp
+++ b/src/osgText/Text.cpp
@ -95,7 +95,7 @@ void Text::setCharacterSize(float height,float aspectRatio)
 }


-void Text::setText(const TextString& text)
+void Text::setText(const String& text)
 {
    _text = text;
    computeGlyphRepresentation();
@ -103,45 +103,20 @@ void Text::setText(const TextString& text)

 void Text::setText(const std::string& text)
 {
-    _text.clear();
-    std::copy(text.begin(),text.end(),std::back_inserter(_text));
+    _text.set(text);
    computeGlyphRepresentation();
 }

-void Text::setText(const std::string& text,Encoding encoding)
+void Text::setText(const std::string& text,String::Encoding encoding)
 {
-    _text.clear();
-
-    if (text.empty()) return;
-    
-    
-    std::cerr << "Text::setText(const std::string& text,Encoding encoding) not implemented yet."<<std::endl;
-
-    //std::string::const_iterator itr = text.begin();
-
-    if ((encoding == ENCODING_SIGNATURE) || 
-        (encoding == ENCODING_UTF16) || 
-        (encoding == ENCODING_UTF32))
-    {
-//        encoding = findEncoding(text,pos);
-    }
-    
-    
+    _text.set(text,encoding);
+    computeGlyphRepresentation();
 }
    

 void Text::setText(const wchar_t* text)
 {
-    _text.clear();
-    if (text)
-    {
-        // find the end of wchar_t string
-        const wchar_t* endOfText = text;
-        while (*endOfText) ++endOfText;
-        
-        // pass it to the _text field.
-        std::copy(text,endOfText,std::back_inserter(_text));
-    }
+    _text.set(text);
    computeGlyphRepresentation();
 }

@ -249,7 +224,7 @@ void Text::computeGlyphRepresentation()
    float hr = _characterHeight/(float)activefont->getHeight();
    float wr = hr/_characterAspectRatio;

-    for(TextString::iterator itr=_text.begin();
+    for(String::iterator itr=_text.begin();
        itr!=_text.end();
        ++itr)
    {