dlib/docs/docs/parsing.xml
2012-11-17 23:58:22 -05:00

628 lines
24 KiB
XML

<?xml version="1.0" encoding="ISO-8859-1"?>
<?xml-stylesheet type="text/xsl" href="stylesheet.xsl"?>
<doc>
<title>Parsing</title>
<!-- ************************************************************************* -->
<body>
<br/><br/>
<p>
This page documents the objects and functions that in some way deal with parsing or otherwise
manipulating text.
Everything here follows the same conventions as the rest of the library.
</p>
</body>
<!-- ************************************************************************* -->
<menu width="150">
<top>
<section>
<name>Objects</name>
<item>cmd_line_parser</item>
<item>config_reader</item>
<item>cpp_pretty_printer</item>
<item>cpp_tokenizer</item>
<item>tokenizer</item>
<item>xml_parser</item>
<item>base64</item>
<item>unichar</item>
<item>ustring</item>
<item>basic_utf8_ifstream</item>
</section>
<section>
<name>Global Functions</name>
<item>string_cast</item>
<item>string_assign</item>
<item>cast_to_string</item>
<item>pad_int_with_zeros</item>
<item>cast_to_wstring</item>
<item>wrap_string</item>
<item>narrow</item>
<item>trim</item>
<item>ltrim</item>
<item>rtrim</item>
<item>pad</item>
<item>lpad</item>
<item>rpad</item>
<item>left_substr</item>
<item>right_substr</item>
<item>split</item>
<item>tolower</item>
<item>toupper</item>
<item>convert_utf8_to_utf32</item>
<item>is_combining_char</item>
<item>strings_equal_ignore_case</item>
</section>
</top>
</menu>
<!-- ************************************************************************* -->
<!-- ************************************************************************* -->
<!-- ************************************************************************* -->
<components>
<!-- ************************************************************************* -->
<component>
<name>toupper</name>
<file>dlib/string.h</file>
<spec_file link="true">dlib/string/string_abstract.h</spec_file>
<description>
This is a function to convert a string to all uppercase.
</description>
</component>
<!-- ************************************************************************* -->
<component>
<name>tolower</name>
<file>dlib/string.h</file>
<spec_file link="true">dlib/string/string_abstract.h</spec_file>
<description>
This is a function to convert a string to all lowercase.
</description>
</component>
<!-- ************************************************************************* -->
<component>
<name>split</name>
<file>dlib/string.h</file>
<spec_file link="true">dlib/string/string_abstract.h</spec_file>
<description>
Breaks a string into a sequence of substrings delimited
by a user specified set of characters.
</description>
</component>
<!-- ************************************************************************* -->
<component>
<name>right_substr</name>
<file>dlib/string.h</file>
<spec_file link="true">dlib/string/string_abstract.h</spec_file>
<description>
This is a function to return the part of a string to the right of a user supplied delimiter.
</description>
</component>
<!-- ************************************************************************* -->
<component>
<name>left_substr</name>
<file>dlib/string.h</file>
<spec_file link="true">dlib/string/string_abstract.h</spec_file>
<description>
This is a function to return the part of a string to the left of a user supplied delimiter.
</description>
</component>
<!-- ************************************************************************* -->
<component>
<name>rpad</name>
<file>dlib/string.h</file>
<spec_file link="true">dlib/string/string_abstract.h</spec_file>
<description>
This is a function to pad whitespace (or user specified characters) onto the right most end of a string.
</description>
</component>
<!-- ************************************************************************* -->
<component>
<name>lpad</name>
<file>dlib/string.h</file>
<spec_file link="true">dlib/string/string_abstract.h</spec_file>
<description>
This is a function to pad whitespace (or user specified characters) onto the left most end of a string.
</description>
</component>
<!-- ************************************************************************* -->
<component>
<name>pad</name>
<file>dlib/string.h</file>
<spec_file link="true">dlib/string/string_abstract.h</spec_file>
<description>
This is a function to pad whitespace (or user specified characters) onto the ends of a string.
</description>
</component>
<!-- ************************************************************************* -->
<component>
<name>rtrim</name>
<file>dlib/string.h</file>
<spec_file link="true">dlib/string/string_abstract.h</spec_file>
<description>
This is a function to remove the whitespace (or user specified characters) from the right most end of a string.
</description>
</component>
<!-- ************************************************************************* -->
<component>
<name>ltrim</name>
<file>dlib/string.h</file>
<spec_file link="true">dlib/string/string_abstract.h</spec_file>
<description>
This is a function to remove the whitespace (or user specified characters) from the left most end of a string.
</description>
</component>
<!-- ************************************************************************* -->
<component>
<name>trim</name>
<file>dlib/string.h</file>
<spec_file link="true">dlib/string/string_abstract.h</spec_file>
<description>
This is a function to remove the whitespace (or user specified characters) from the ends of a string.
</description>
</component>
<!-- ************************************************************************* -->
<component>
<name>narrow</name>
<file>dlib/string.h</file>
<spec_file link="true">dlib/string/string_abstract.h</spec_file>
<description>
This is a function for converting a string of type std::string or std::wstring
to a plain std::string.
</description>
</component>
<!-- ************************************************************************* -->
<component>
<name>wrap_string</name>
<file>dlib/string.h</file>
<spec_file link="true">dlib/string/string_abstract.h</spec_file>
<description>
wrap_string is a function that takes a string and breaks it into a number of
lines of a given length. You can use this to make a string
fit nicely into a command prompt window for example.
</description>
</component>
<!-- ************************************************************************* -->
<component>
<name>strings_equal_ignore_case</name>
<file>dlib/string.h</file>
<spec_file link="true">dlib/string/string_abstract.h</spec_file>
<description>
This is a pair of functions to do a case insensitive comparison between strings.
</description>
</component>
<!-- ************************************************************************* -->
<component>
<name>cast_to_wstring</name>
<file>dlib/string.h</file>
<spec_file link="true">dlib/string/string_abstract.h</spec_file>
<description>
cast_to_string is a templated function which makes it easy to convert arbitrary objects to
std::wstring strings. The types supported are any types that can be written to std::wostream via
operator&lt;&lt;.
</description>
</component>
<!-- ************************************************************************* -->
<component>
<name>cast_to_string</name>
<file>dlib/string.h</file>
<spec_file link="true">dlib/string/string_abstract.h</spec_file>
<description>
cast_to_string is a templated function which makes it easy to convert arbitrary objects to
std::string strings. The types supported are any types that can be written to std::ostream via
operator&lt;&lt;.
</description>
</component>
<!-- ************************************************************************* -->
<component>
<name>pad_int_with_zeros</name>
<file>dlib/string.h</file>
<spec_file link="true">dlib/string/string_abstract.h</spec_file>
<description>
Converts an integer into a string and pads it with leading zeros.
</description>
</component>
<!-- ************************************************************************* -->
<component>
<name>string_cast</name>
<file>dlib/string.h</file>
<spec_file link="true">dlib/string/string_abstract.h</spec_file>
<description>
string_cast is a templated function which makes it easy to convert strings to
other types. The types supported are any types that can be read by the basic_istream operator>>. It
also supports casting between wstring, string, and ustring objects.
</description>
</component>
<!-- ************************************************************************* -->
<component>
<name>string_assign</name>
<file>dlib/string.h</file>
<spec_file link="true">dlib/string/string_abstract.h</spec_file>
<description>
string_assign is an object which makes it easy to convert strings to
other types. The types supported are any types that can be read by the basic_istream operator>>. It
also supports casting between wstring, string, and ustring objects. Since
string_assign is a simple stateless object there is a global instance of it
called dlib::sa.
</description>
<examples>
<example>config_reader_ex.cpp.html</example>
</examples>
</component>
<!-- ************************************************************************* -->
<component>
<name>unichar</name>
<file>dlib/unicode.h</file>
<spec_file link="true">dlib/unicode/unicode_abstract.h</spec_file>
<description>
This is a typedef for an unsigned 32bit integer which we use to store
Unicode values.
</description>
</component>
<!-- ************************************************************************* -->
<component>
<name>basic_utf8_ifstream</name>
<file>dlib/unicode.h</file>
<spec_file link="true">dlib/unicode/unicode_abstract.h</spec_file>
<description>
This object represents an input file stream much like the
normal std::ifstream except that it knows how to read UTF-8
data. So when you read characters out of this stream it will
automatically convert them from the UTF-8 multibyte encoding
into a fixed width wide character encoding.
<p>
There are also two typedefs of this object. The first is utf8_wifstream which is a
typedef for wchar_t as the wide character to read into. The second is utf8_uifstream
which uses unichar instead of wchar_t.
</p>
</description>
</component>
<!-- ************************************************************************* -->
<component>
<name>ustring</name>
<file>dlib/unicode.h</file>
<spec_file link="true">dlib/unicode/unicode_abstract.h</spec_file>
<description>
This is a typedef for a std::basic_string&lt;unichar&gt;. That is, it is a typedef
for a string object that stores unichar Unicode characters.
</description>
</component>
<!-- ************************************************************************* -->
<component>
<name>is_combining_char</name>
<file>dlib/unicode.h</file>
<spec_file link="true">dlib/unicode/unicode_abstract.h</spec_file>
<description>
This is a global function that can tell you if a character is a Unicode
combining character or not.
</description>
</component>
<!-- ************************************************************************* -->
<component>
<name>convert_utf8_to_utf32</name>
<file>dlib/unicode.h</file>
<spec_file link="true">dlib/unicode/unicode_abstract.h</spec_file>
<description>
This is a global function that can convert UTF-8 strings into strings
of 32bit unichar characters.
</description>
</component>
<!-- ************************************************************************* -->
<component>
<name>base64</name>
<file>dlib/base64.h</file>
<spec_file>dlib/base64/base64_kernel_abstract.h</spec_file>
<description>
This object allows you to encode and decode data to and from
the Base64 Content-Transfer-Encoding defined in section 6.8 of
rfc2045.
</description>
<examples>
<example>file_to_code_ex.cpp.html</example>
</examples>
</component>
<!-- ************************************************************************* -->
<component checked="true">
<name>cmd_line_parser</name>
<file>dlib/cmd_line_parser.h</file>
<spec_file>dlib/cmd_line_parser/cmd_line_parser_kernel_abstract.h</spec_file>
<description>
This object allows you to easily parse a command line. Note that the
documentation for the <a href="dlib/interfaces/cmd_line_parser_option.h.html">cmd_line_parser_option</a>
(the object returned by the parser's .option() function) is in a separate file.
<p>
Note also that there are standard typedefs for the ASCII and wide character versions of the
cmd_line_parser template. These are the <tt>command_line_parser</tt> and <tt>wcommand_line_parser</tt>
types respectively.
</p>
</description>
<examples>
<example>compress_stream_ex.cpp.html</example>
<example>train_object_detector.cpp.html</example>
</examples>
<extensions>
<extension>
<name>get_option</name>
<spec_file>dlib/cmd_line_parser/get_option_abstract.h</spec_file>
<description>This extension provides a convenience function for accessing the
options to a command line argument or a <a href="#config_reader">config_reader</a>. It
is automatically #included when using the command line parser or config reader.
</description>
</extension>
</extensions>
</component>
<!-- ************************************************************************* -->
<component>
<name>config_reader</name>
<file>dlib/config_reader.h</file>
<spec_file>dlib/config_reader/config_reader_kernel_abstract.h</spec_file>
<description>
This object represents something which is intended to be used to read
text configuration files.
</description>
<examples>
<example>config_reader_ex.cpp.html</example>
</examples>
<extensions>
<extension>
<name>config_reader_thread_safe</name>
<spec_file>dlib/config_reader/config_reader_thread_safe_abstract.h</spec_file>
<description>
This object extends a normal config_reader by simply wrapping all
its member functions inside mutex locks to make it safe to use
in a threaded program.
</description>
</extension>
</extensions>
</component>
<!-- ************************************************************************* -->
<component>
<name>cpp_pretty_printer</name>
<file>dlib/cpp_pretty_printer.h</file>
<spec_file>dlib/cpp_pretty_printer/cpp_pretty_printer_kernel_abstract.h</spec_file>
<description>
This object represents an HTML pretty printer for C++ source code.
</description>
<implementations>
<implementation>
<name>cpp_pretty_printer_kernel_1</name>
<file>dlib/cpp_pretty_printer/cpp_pretty_printer_kernel_1.h</file>
<description>
This is implemented by using the <a href="#cpp_tokenizer">cpp_tokenizer</a> object.
This is the pretty printer I use on all the source in this library. It applies a color scheme, turns
include directives such as #include "file.h" into links to file.h.html and puts HTML anchor points
on function and class declarations. It also looks for comments starting with /*!A and puts an anchor
before the comment using the word following the A as the name of the anchor.
</description>
<typedefs>
<typedef>
<name>kernel_1a</name>
<description>is a typedef for cpp_pretty_printer_kernel_1</description>
</typedef>
</typedefs>
</implementation>
<implementation>
<name>cpp_pretty_printer_kernel_2</name>
<file>dlib/cpp_pretty_printer/cpp_pretty_printer_kernel_2.h</file>
<description>
This is implemented by using the <a href="#cpp_tokenizer">cpp_tokenizer</a> object.
It applies a black and white color scheme suitable
for printing on a black and white printer. It also places the document title
prominently at the top of the pretty printed source file.
</description>
<typedefs>
<typedef>
<name>kernel_2a</name>
<description>is a typedef for cpp_pretty_printer_kernel_2</description>
</typedef>
</typedefs>
</implementation>
</implementations>
</component>
<!-- ************************************************************************* -->
<component checked="true">
<name>cpp_tokenizer</name>
<file>dlib/cpp_tokenizer.h</file>
<spec_file>dlib/cpp_tokenizer/cpp_tokenizer_kernel_abstract.h</spec_file>
<description>
This object represents a simple tokenizer for C++ source code.
</description>
<implementations>
<implementation>
<name>cpp_tokenizer_kernel_1</name>
<file>dlib/cpp_tokenizer/cpp_tokenizer_kernel_1.h</file>
<description>
This is implemented by using the <a href="#tokenizer">tokenizer</a> object in the obvious way.
</description>
<typedefs>
<typedef>
<name>kernel_1a</name>
<description>is a typedef for cpp_tokenizer_kernel_1</description>
</typedef>
</typedefs>
</implementation>
</implementations>
</component>
<!-- ************************************************************************* -->
<component checked="true">
<name>tokenizer</name>
<file>dlib/tokenizer.h</file>
<spec_file>dlib/tokenizer/tokenizer_kernel_abstract.h</spec_file>
<description>
This object represents a simple tokenizer for textual data.
</description>
<implementations>
<implementation>
<name>tokenizer_kernel_1</name>
<file>dlib/tokenizer/tokenizer_kernel_1.h</file>
<description>
This is implemented in the obvious way.
</description>
<typedefs>
<typedef>
<name>kernel_1a</name>
<description>is a typedef for tokenizer_kernel_1</description>
</typedef>
</typedefs>
</implementation>
</implementations>
</component>
<!-- ************************************************************************* -->
<component>
<name>xml_parser</name>
<file>dlib/xml_parser.h</file>
<spec_file>dlib/xml_parser/xml_parser_kernel_abstract.h</spec_file>
<description>
This object represents a simple SAX style event driven XML parser.
It takes its input from an input stream object and sends events to all
registered document_handler and error_handler objects.
<br/><br/>
The xml_parser object also uses the interface classes
<a href="dlib/xml_parser/xml_parser_kernel_interfaces.h.html#document_handler">document_handler</a>
and
<a href="dlib/xml_parser/xml_parser_kernel_interfaces.h.html#error_handler">error_handler</a>.
Subclasses of these classes are passed to the xml_parser which generates events while it's
parsing and sends them to the appropriate handler.
</description>
<examples>
<example>xml_parser_ex.cpp.html</example>
</examples>
</component>
<!-- ************************************************************************* -->
</components>
<!-- ************************************************************************* -->
</doc>