mirror of
https://github.com/davisking/dlib.git
synced 2024-11-01 10:14:53 +08:00
628 lines
24 KiB
XML
628 lines
24 KiB
XML
<?xml version="1.0" encoding="ISO-8859-1"?>
|
|
<?xml-stylesheet type="text/xsl" href="stylesheet.xsl"?>
|
|
|
|
<doc>
|
|
<title>Parsing</title>
|
|
|
|
<!-- ************************************************************************* -->
|
|
|
|
<body>
|
|
<br/><br/>
|
|
|
|
<p>
|
|
This page documents the objects and functions that in some way deal with parsing or otherwise
|
|
manipulating text.
|
|
Everything here follows the same conventions as the rest of the library.
|
|
</p>
|
|
|
|
|
|
|
|
</body>
|
|
|
|
<!-- ************************************************************************* -->
|
|
|
|
<menu width="150">
|
|
<top>
|
|
<section>
|
|
<name>Objects</name>
|
|
<item>cmd_line_parser</item>
|
|
<item>config_reader</item>
|
|
<item>cpp_pretty_printer</item>
|
|
<item>cpp_tokenizer</item>
|
|
<item>tokenizer</item>
|
|
<item>xml_parser</item>
|
|
<item>base64</item>
|
|
<item>unichar</item>
|
|
<item>ustring</item>
|
|
<item>basic_utf8_ifstream</item>
|
|
|
|
</section>
|
|
|
|
<section>
|
|
<name>Global Functions</name>
|
|
<item>string_cast</item>
|
|
<item>string_assign</item>
|
|
<item>cast_to_string</item>
|
|
<item>pad_int_with_zeros</item>
|
|
<item>cast_to_wstring</item>
|
|
<item>wrap_string</item>
|
|
<item>narrow</item>
|
|
<item>trim</item>
|
|
<item>ltrim</item>
|
|
<item>rtrim</item>
|
|
<item>pad</item>
|
|
<item>lpad</item>
|
|
<item>rpad</item>
|
|
<item>left_substr</item>
|
|
<item>right_substr</item>
|
|
<item>split</item>
|
|
<item>tolower</item>
|
|
<item>toupper</item>
|
|
<item>convert_utf8_to_utf32</item>
|
|
<item>is_combining_char</item>
|
|
<item>strings_equal_ignore_case</item>
|
|
</section>
|
|
</top>
|
|
</menu>
|
|
|
|
<!-- ************************************************************************* -->
|
|
<!-- ************************************************************************* -->
|
|
<!-- ************************************************************************* -->
|
|
|
|
<components>
|
|
|
|
|
|
|
|
<!-- ************************************************************************* -->
|
|
|
|
<component>
|
|
<name>toupper</name>
|
|
<file>dlib/string.h</file>
|
|
<spec_file link="true">dlib/string/string_abstract.h</spec_file>
|
|
<description>
|
|
This is a function to convert a string to all uppercase.
|
|
</description>
|
|
|
|
</component>
|
|
|
|
<!-- ************************************************************************* -->
|
|
|
|
<component>
|
|
<name>tolower</name>
|
|
<file>dlib/string.h</file>
|
|
<spec_file link="true">dlib/string/string_abstract.h</spec_file>
|
|
<description>
|
|
This is a function to convert a string to all lowercase.
|
|
</description>
|
|
|
|
</component>
|
|
|
|
|
|
<!-- ************************************************************************* -->
|
|
|
|
<component>
|
|
<name>split</name>
|
|
<file>dlib/string.h</file>
|
|
<spec_file link="true">dlib/string/string_abstract.h</spec_file>
|
|
<description>
|
|
Breaks a string into a sequence of substrings delimited
|
|
by a user specified set of characters.
|
|
</description>
|
|
|
|
</component>
|
|
|
|
<!-- ************************************************************************* -->
|
|
<component>
|
|
<name>right_substr</name>
|
|
<file>dlib/string.h</file>
|
|
<spec_file link="true">dlib/string/string_abstract.h</spec_file>
|
|
<description>
|
|
This is a function to return the part of a string to the right of a user supplied delimiter.
|
|
</description>
|
|
|
|
</component>
|
|
|
|
<!-- ************************************************************************* -->
|
|
<component>
|
|
<name>left_substr</name>
|
|
<file>dlib/string.h</file>
|
|
<spec_file link="true">dlib/string/string_abstract.h</spec_file>
|
|
<description>
|
|
This is a function to return the part of a string to the left of a user supplied delimiter.
|
|
</description>
|
|
|
|
</component>
|
|
|
|
<!-- ************************************************************************* -->
|
|
|
|
<component>
|
|
<name>rpad</name>
|
|
<file>dlib/string.h</file>
|
|
<spec_file link="true">dlib/string/string_abstract.h</spec_file>
|
|
<description>
|
|
This is a function to pad whitespace (or user specified characters) onto the right most end of a string.
|
|
</description>
|
|
|
|
</component>
|
|
|
|
<!-- ************************************************************************* -->
|
|
|
|
<component>
|
|
<name>lpad</name>
|
|
<file>dlib/string.h</file>
|
|
<spec_file link="true">dlib/string/string_abstract.h</spec_file>
|
|
<description>
|
|
This is a function to pad whitespace (or user specified characters) onto the left most end of a string.
|
|
</description>
|
|
|
|
</component>
|
|
|
|
<!-- ************************************************************************* -->
|
|
|
|
<component>
|
|
<name>pad</name>
|
|
<file>dlib/string.h</file>
|
|
<spec_file link="true">dlib/string/string_abstract.h</spec_file>
|
|
<description>
|
|
This is a function to pad whitespace (or user specified characters) onto the ends of a string.
|
|
</description>
|
|
|
|
</component>
|
|
|
|
<!-- ************************************************************************* -->
|
|
|
|
<component>
|
|
<name>rtrim</name>
|
|
<file>dlib/string.h</file>
|
|
<spec_file link="true">dlib/string/string_abstract.h</spec_file>
|
|
<description>
|
|
This is a function to remove the whitespace (or user specified characters) from the right most end of a string.
|
|
</description>
|
|
|
|
</component>
|
|
|
|
<!-- ************************************************************************* -->
|
|
|
|
<component>
|
|
<name>ltrim</name>
|
|
<file>dlib/string.h</file>
|
|
<spec_file link="true">dlib/string/string_abstract.h</spec_file>
|
|
<description>
|
|
This is a function to remove the whitespace (or user specified characters) from the left most end of a string.
|
|
</description>
|
|
|
|
</component>
|
|
|
|
<!-- ************************************************************************* -->
|
|
|
|
<component>
|
|
<name>trim</name>
|
|
<file>dlib/string.h</file>
|
|
<spec_file link="true">dlib/string/string_abstract.h</spec_file>
|
|
<description>
|
|
This is a function to remove the whitespace (or user specified characters) from the ends of a string.
|
|
</description>
|
|
|
|
</component>
|
|
|
|
<!-- ************************************************************************* -->
|
|
|
|
<component>
|
|
<name>narrow</name>
|
|
<file>dlib/string.h</file>
|
|
<spec_file link="true">dlib/string/string_abstract.h</spec_file>
|
|
<description>
|
|
This is a function for converting a string of type std::string or std::wstring
|
|
to a plain std::string.
|
|
</description>
|
|
|
|
</component>
|
|
|
|
<!-- ************************************************************************* -->
|
|
|
|
<component>
|
|
<name>wrap_string</name>
|
|
<file>dlib/string.h</file>
|
|
<spec_file link="true">dlib/string/string_abstract.h</spec_file>
|
|
<description>
|
|
wrap_string is a function that takes a string and breaks it into a number of
|
|
lines of a given length. You can use this to make a string
|
|
fit nicely into a command prompt window for example.
|
|
</description>
|
|
|
|
</component>
|
|
|
|
<!-- ************************************************************************* -->
|
|
|
|
<component>
|
|
<name>strings_equal_ignore_case</name>
|
|
<file>dlib/string.h</file>
|
|
<spec_file link="true">dlib/string/string_abstract.h</spec_file>
|
|
<description>
|
|
This is a pair of functions to do a case insensitive comparison between strings.
|
|
</description>
|
|
|
|
</component>
|
|
|
|
<!-- ************************************************************************* -->
|
|
|
|
<component>
|
|
<name>cast_to_wstring</name>
|
|
<file>dlib/string.h</file>
|
|
<spec_file link="true">dlib/string/string_abstract.h</spec_file>
|
|
<description>
|
|
cast_to_string is a templated function which makes it easy to convert arbitrary objects to
|
|
std::wstring strings. The types supported are any types that can be written to std::wostream via
|
|
operator<<.
|
|
</description>
|
|
|
|
</component>
|
|
|
|
<!-- ************************************************************************* -->
|
|
|
|
<component>
|
|
<name>cast_to_string</name>
|
|
<file>dlib/string.h</file>
|
|
<spec_file link="true">dlib/string/string_abstract.h</spec_file>
|
|
<description>
|
|
cast_to_string is a templated function which makes it easy to convert arbitrary objects to
|
|
std::string strings. The types supported are any types that can be written to std::ostream via
|
|
operator<<.
|
|
</description>
|
|
|
|
</component>
|
|
|
|
<!-- ************************************************************************* -->
|
|
|
|
<component>
|
|
<name>pad_int_with_zeros</name>
|
|
<file>dlib/string.h</file>
|
|
<spec_file link="true">dlib/string/string_abstract.h</spec_file>
|
|
<description>
|
|
Converts an integer into a string and pads it with leading zeros.
|
|
</description>
|
|
|
|
</component>
|
|
|
|
<!-- ************************************************************************* -->
|
|
|
|
<component>
|
|
<name>string_cast</name>
|
|
<file>dlib/string.h</file>
|
|
<spec_file link="true">dlib/string/string_abstract.h</spec_file>
|
|
<description>
|
|
string_cast is a templated function which makes it easy to convert strings to
|
|
other types. The types supported are any types that can be read by the basic_istream operator>>. It
|
|
also supports casting between wstring, string, and ustring objects.
|
|
</description>
|
|
</component>
|
|
|
|
<!-- ************************************************************************* -->
|
|
|
|
<component>
|
|
<name>string_assign</name>
|
|
<file>dlib/string.h</file>
|
|
<spec_file link="true">dlib/string/string_abstract.h</spec_file>
|
|
<description>
|
|
string_assign is an object which makes it easy to convert strings to
|
|
other types. The types supported are any types that can be read by the basic_istream operator>>. It
|
|
also supports casting between wstring, string, and ustring objects. Since
|
|
string_assign is a simple stateless object there is a global instance of it
|
|
called dlib::sa.
|
|
</description>
|
|
<examples>
|
|
<example>config_reader_ex.cpp.html</example>
|
|
</examples>
|
|
|
|
</component>
|
|
|
|
<!-- ************************************************************************* -->
|
|
|
|
<component>
|
|
<name>unichar</name>
|
|
<file>dlib/unicode.h</file>
|
|
<spec_file link="true">dlib/unicode/unicode_abstract.h</spec_file>
|
|
<description>
|
|
This is a typedef for an unsigned 32bit integer which we use to store
|
|
Unicode values.
|
|
</description>
|
|
|
|
</component>
|
|
|
|
<!-- ************************************************************************* -->
|
|
|
|
<component>
|
|
<name>basic_utf8_ifstream</name>
|
|
<file>dlib/unicode.h</file>
|
|
<spec_file link="true">dlib/unicode/unicode_abstract.h</spec_file>
|
|
<description>
|
|
This object represents an input file stream much like the
|
|
normal std::ifstream except that it knows how to read UTF-8
|
|
data. So when you read characters out of this stream it will
|
|
automatically convert them from the UTF-8 multibyte encoding
|
|
into a fixed width wide character encoding.
|
|
|
|
<p>
|
|
There are also two typedefs of this object. The first is utf8_wifstream which is a
|
|
typedef for wchar_t as the wide character to read into. The second is utf8_uifstream
|
|
which uses unichar instead of wchar_t.
|
|
</p>
|
|
</description>
|
|
|
|
</component>
|
|
|
|
|
|
<!-- ************************************************************************* -->
|
|
|
|
<component>
|
|
<name>ustring</name>
|
|
<file>dlib/unicode.h</file>
|
|
<spec_file link="true">dlib/unicode/unicode_abstract.h</spec_file>
|
|
<description>
|
|
This is a typedef for a std::basic_string<unichar>. That is, it is a typedef
|
|
for a string object that stores unichar Unicode characters.
|
|
</description>
|
|
|
|
</component>
|
|
|
|
<!-- ************************************************************************* -->
|
|
|
|
<component>
|
|
<name>is_combining_char</name>
|
|
<file>dlib/unicode.h</file>
|
|
<spec_file link="true">dlib/unicode/unicode_abstract.h</spec_file>
|
|
<description>
|
|
This is a global function that can tell you if a character is a Unicode
|
|
combining character or not.
|
|
</description>
|
|
|
|
</component>
|
|
|
|
<!-- ************************************************************************* -->
|
|
|
|
<component>
|
|
<name>convert_utf8_to_utf32</name>
|
|
<file>dlib/unicode.h</file>
|
|
<spec_file link="true">dlib/unicode/unicode_abstract.h</spec_file>
|
|
<description>
|
|
This is a global function that can convert UTF-8 strings into strings
|
|
of 32bit unichar characters.
|
|
</description>
|
|
|
|
</component>
|
|
|
|
<!-- ************************************************************************* -->
|
|
|
|
<component>
|
|
<name>base64</name>
|
|
<file>dlib/base64.h</file>
|
|
<spec_file>dlib/base64/base64_kernel_abstract.h</spec_file>
|
|
<description>
|
|
This object allows you to encode and decode data to and from
|
|
the Base64 Content-Transfer-Encoding defined in section 6.8 of
|
|
rfc2045.
|
|
</description>
|
|
|
|
<examples>
|
|
<example>file_to_code_ex.cpp.html</example>
|
|
</examples>
|
|
</component>
|
|
|
|
<!-- ************************************************************************* -->
|
|
|
|
<component checked="true">
|
|
<name>cmd_line_parser</name>
|
|
<file>dlib/cmd_line_parser.h</file>
|
|
<spec_file>dlib/cmd_line_parser/cmd_line_parser_kernel_abstract.h</spec_file>
|
|
<description>
|
|
This object allows you to easily parse a command line. Note that the
|
|
documentation for the <a href="dlib/interfaces/cmd_line_parser_option.h.html">cmd_line_parser_option</a>
|
|
(the object returned by the parser's .option() function) is in a separate file.
|
|
<p>
|
|
Note also that there are standard typedefs for the ASCII and wide character versions of the
|
|
cmd_line_parser template. These are the <tt>command_line_parser</tt> and <tt>wcommand_line_parser</tt>
|
|
types respectively.
|
|
</p>
|
|
</description>
|
|
|
|
<examples>
|
|
<example>compress_stream_ex.cpp.html</example>
|
|
<example>train_object_detector.cpp.html</example>
|
|
</examples>
|
|
|
|
<extensions>
|
|
<extension>
|
|
<name>get_option</name>
|
|
<spec_file>dlib/cmd_line_parser/get_option_abstract.h</spec_file>
|
|
<description>This extension provides a convenience function for accessing the
|
|
options to a command line argument or a <a href="#config_reader">config_reader</a>. It
|
|
is automatically #included when using the command line parser or config reader.
|
|
</description>
|
|
</extension>
|
|
</extensions>
|
|
|
|
|
|
</component>
|
|
|
|
<!-- ************************************************************************* -->
|
|
|
|
<component>
|
|
<name>config_reader</name>
|
|
<file>dlib/config_reader.h</file>
|
|
<spec_file>dlib/config_reader/config_reader_kernel_abstract.h</spec_file>
|
|
<description>
|
|
This object represents something which is intended to be used to read
|
|
text configuration files.
|
|
</description>
|
|
|
|
<examples>
|
|
<example>config_reader_ex.cpp.html</example>
|
|
</examples>
|
|
|
|
<extensions>
|
|
<extension>
|
|
<name>config_reader_thread_safe</name>
|
|
<spec_file>dlib/config_reader/config_reader_thread_safe_abstract.h</spec_file>
|
|
<description>
|
|
This object extends a normal config_reader by simply wrapping all
|
|
its member functions inside mutex locks to make it safe to use
|
|
in a threaded program.
|
|
</description>
|
|
</extension>
|
|
</extensions>
|
|
|
|
|
|
</component>
|
|
|
|
<!-- ************************************************************************* -->
|
|
|
|
<component>
|
|
<name>cpp_pretty_printer</name>
|
|
<file>dlib/cpp_pretty_printer.h</file>
|
|
<spec_file>dlib/cpp_pretty_printer/cpp_pretty_printer_kernel_abstract.h</spec_file>
|
|
<description>
|
|
This object represents an HTML pretty printer for C++ source code.
|
|
</description>
|
|
|
|
<implementations>
|
|
<implementation>
|
|
<name>cpp_pretty_printer_kernel_1</name>
|
|
<file>dlib/cpp_pretty_printer/cpp_pretty_printer_kernel_1.h</file>
|
|
<description>
|
|
This is implemented by using the <a href="#cpp_tokenizer">cpp_tokenizer</a> object.
|
|
This is the pretty printer I use on all the source in this library. It applies a color scheme, turns
|
|
include directives such as #include "file.h" into links to file.h.html and puts HTML anchor points
|
|
on function and class declarations. It also looks for comments starting with /*!A and puts an anchor
|
|
before the comment using the word following the A as the name of the anchor.
|
|
</description>
|
|
|
|
<typedefs>
|
|
<typedef>
|
|
<name>kernel_1a</name>
|
|
<description>is a typedef for cpp_pretty_printer_kernel_1</description>
|
|
</typedef>
|
|
</typedefs>
|
|
|
|
</implementation>
|
|
<implementation>
|
|
<name>cpp_pretty_printer_kernel_2</name>
|
|
<file>dlib/cpp_pretty_printer/cpp_pretty_printer_kernel_2.h</file>
|
|
<description>
|
|
This is implemented by using the <a href="#cpp_tokenizer">cpp_tokenizer</a> object.
|
|
It applies a black and white color scheme suitable
|
|
for printing on a black and white printer. It also places the document title
|
|
prominently at the top of the pretty printed source file.
|
|
</description>
|
|
|
|
<typedefs>
|
|
<typedef>
|
|
<name>kernel_2a</name>
|
|
<description>is a typedef for cpp_pretty_printer_kernel_2</description>
|
|
</typedef>
|
|
</typedefs>
|
|
|
|
</implementation>
|
|
|
|
</implementations>
|
|
|
|
</component>
|
|
|
|
<!-- ************************************************************************* -->
|
|
|
|
<component checked="true">
|
|
<name>cpp_tokenizer</name>
|
|
<file>dlib/cpp_tokenizer.h</file>
|
|
<spec_file>dlib/cpp_tokenizer/cpp_tokenizer_kernel_abstract.h</spec_file>
|
|
<description>
|
|
This object represents a simple tokenizer for C++ source code.
|
|
</description>
|
|
|
|
<implementations>
|
|
<implementation>
|
|
<name>cpp_tokenizer_kernel_1</name>
|
|
<file>dlib/cpp_tokenizer/cpp_tokenizer_kernel_1.h</file>
|
|
<description>
|
|
This is implemented by using the <a href="#tokenizer">tokenizer</a> object in the obvious way.
|
|
</description>
|
|
|
|
<typedefs>
|
|
<typedef>
|
|
<name>kernel_1a</name>
|
|
<description>is a typedef for cpp_tokenizer_kernel_1</description>
|
|
</typedef>
|
|
</typedefs>
|
|
|
|
</implementation>
|
|
|
|
</implementations>
|
|
|
|
</component>
|
|
|
|
<!-- ************************************************************************* -->
|
|
|
|
<component checked="true">
|
|
<name>tokenizer</name>
|
|
<file>dlib/tokenizer.h</file>
|
|
<spec_file>dlib/tokenizer/tokenizer_kernel_abstract.h</spec_file>
|
|
<description>
|
|
This object represents a simple tokenizer for textual data.
|
|
</description>
|
|
|
|
<implementations>
|
|
<implementation>
|
|
<name>tokenizer_kernel_1</name>
|
|
<file>dlib/tokenizer/tokenizer_kernel_1.h</file>
|
|
<description>
|
|
This is implemented in the obvious way.
|
|
</description>
|
|
|
|
<typedefs>
|
|
<typedef>
|
|
<name>kernel_1a</name>
|
|
<description>is a typedef for tokenizer_kernel_1</description>
|
|
</typedef>
|
|
</typedefs>
|
|
|
|
</implementation>
|
|
|
|
</implementations>
|
|
|
|
</component>
|
|
|
|
<!-- ************************************************************************* -->
|
|
|
|
<component>
|
|
<name>xml_parser</name>
|
|
<file>dlib/xml_parser.h</file>
|
|
<spec_file>dlib/xml_parser/xml_parser_kernel_abstract.h</spec_file>
|
|
<description>
|
|
|
|
This object represents a simple SAX style event driven XML parser.
|
|
It takes its input from an input stream object and sends events to all
|
|
registered document_handler and error_handler objects.
|
|
<br/><br/>
|
|
|
|
The xml_parser object also uses the interface classes
|
|
<a href="dlib/xml_parser/xml_parser_kernel_interfaces.h.html#document_handler">document_handler</a>
|
|
and
|
|
<a href="dlib/xml_parser/xml_parser_kernel_interfaces.h.html#error_handler">error_handler</a>.
|
|
Subclasses of these classes are passed to the xml_parser which generates events while it's
|
|
parsing and sends them to the appropriate handler.
|
|
|
|
</description>
|
|
|
|
<examples>
|
|
<example>xml_parser_ex.cpp.html</example>
|
|
</examples>
|
|
</component>
|
|
|
|
|
|
<!-- ************************************************************************* -->
|
|
|
|
</components>
|
|
|
|
<!-- ************************************************************************* -->
|
|
|
|
|
|
</doc>
|