#include "to_xml.h"
#include "dlib/dir_nav.h"
#include <vector>
#include <sstream>
#include <iostream>
#include <fstream>
#include <stack>
#include "dlib/cpp_tokenizer.h"
#include "dlib/string.h"
using namespace dlib;
using namespace std;
// ----------------------------------------------------------------------------------------
typedef cpp_tokenizer::kernel_1a_c tok_type;
// ----------------------------------------------------------------------------------------
class file_filter
const string& filter
// pick out the filter strings
istringstream sin(filter);
string temp;
sin >> temp;
while (sin)
endings.push_back("." + temp);
sin >> temp;
bool operator() ( const file& f) const
// check if any of the endings match
for (unsigned long i = 0; i < endings.size(); ++i)
// if the ending is bigger than f's name then it obviously doesn't match
if (endings[i].size() >
// now check if the actual characters that make up the end of the file name
// matches what is in endings[i].
if ( std::equal(endings[i].begin(), endings[i].end(),[i].size()))
return true;
return false;
std::vector<string> endings;
// ----------------------------------------------------------------------------------------
void obtain_list_of_files (
const cmd_line_parser<char>::check_1a_c& parser,
const std::string& filter,
const unsigned long search_depth,
std::vector<std::pair<string,string> >& files
for (unsigned long i = 0; i < parser.option("i").count(); ++i)
const directory dir(parser.option("i").argument(0,i));
const std::vector<file>& temp = get_files_in_directory_tree(dir, file_filter(filter), search_depth);
// figure out how many characters need to be removed from the path of each file
const string parent = dir.get_parent().full_name();
unsigned long strip = parent.size();
if (parent.size() > 0 && parent[parent.size()-1] != '\\' && parent[parent.size()-1] != '/')
strip += 1;
for (unsigned long i = 0; i < temp.size(); ++i)
files.push_back(make_pair(temp[i].full_name().substr(strip), temp[i].full_name()));
for (unsigned long i = 0; i < parser.number_of_arguments(); ++i)
files.push_back(make_pair(parser[i], parser[i]));
std::sort(files.begin(), files.end());
// ----------------------------------------------------------------------------------------
struct tok_function_record
std::vector<std::pair<int,string> > declaration;
string scope;
string file;
string comment;
struct tok_method_record
std::vector<std::pair<int,string> > declaration;
string comment;
struct tok_variable_record
std::vector<std::pair<int,string> > declaration;
struct tok_typedef_record
std::vector<std::pair<int,string> > declaration;
struct tok_class_record
std::vector<std::pair<int,string> > declaration;
string name;
string scope;
string file;
string comment;
std::vector<tok_method_record> public_methods;
std::vector<tok_method_record> protected_methods;
std::vector<tok_variable_record> public_variables;
std::vector<tok_typedef_record> public_typedefs;
std::vector<tok_variable_record> protected_variables;
std::vector<tok_typedef_record> protected_typedefs;
std::vector<tok_class_record> public_inner_classes;
std::vector<tok_class_record> protected_inner_classes;
// ----------------------------------------------------------------------------------------
struct function_record
string name;
string scope;
string declaration;
string file;
string comment;
struct method_record
string name;
string declaration;
string comment;
struct variable_record
string declaration;
struct typedef_record
string declaration;
struct class_record
string name;
string scope;
string declaration;
string file;
string comment;
std::vector<method_record> public_methods;
std::vector<variable_record> public_variables;
std::vector<typedef_record> public_typedefs;
std::vector<method_record> protected_methods;
std::vector<variable_record> protected_variables;
std::vector<typedef_record> protected_typedefs;
std::vector<class_record> public_inner_classes;
std::vector<class_record> protected_inner_classes;
// ----------------------------------------------------------------------------------------
unsigned long count_newlines (
const string& str
- returns the number of '\n' characters inside str
unsigned long count = 0;
for (unsigned long i = 0; i < str.size(); ++i)
if (str[i] == '\n')
return count;
// ----------------------------------------------------------------------------------------
bool contains_unescaped_newline (
const string& str
- returns true if str contains a '\n' character that isn't preceded by a '\'
if (str.size() == 0)
return false;
if (str[0] == '\n')
return true;
for (unsigned long i = 1; i < str.size(); ++i)
if (str[i] == '\n' && str[i-1] != '\\')
return true;
return false;
// ----------------------------------------------------------------------------------------
bool is_formal_comment (
const string& str
if (str.size() < 6)
return false;
if (str[0] == '/' &&
str[1] == '*' &&
str[2] == '!' &&
str[3] != 'P' &&
str[3] != 'p' &&
str[str.size()-3] == '!' &&
str[str.size()-2] == '*' &&
str[str.size()-1] == '/' )
return true;
return false;
// ----------------------------------------------------------------------------------------
string make_scope_string (
const std::vector<string>& namespaces,
unsigned long exclude_last_num_scopes = 0
string temp;
for (unsigned long i = 0; i + exclude_last_num_scopes < namespaces.size(); ++i)
if (namespaces[i].size() == 0)
if (temp.size() == 0)
temp = namespaces[i];
temp += "::" + namespaces[i];
return temp;
// ----------------------------------------------------------------------------------------
bool looks_like_function_declaration (
const std::vector<std::pair<int,string> >& declaration
// Check if declaration contains IDENTIFIER ( ) somewhere in it.
bool seen_first_part = false;
bool seen_operator = false;
int local_paren_count = 0;
for (unsigned long i = 1; i < declaration.size(); ++i)
if (declaration[i].first == tok_type::KEYWORD &&
declaration[i].second == "operator")
seen_operator = true;
if (declaration[i].first == tok_type::OTHER &&
declaration[i].second == "(" &&
(declaration[i-1].first == tok_type::IDENTIFIER || seen_operator))
seen_first_part = true;
if (declaration[i].first == tok_type::OTHER)
if ( declaration[i].second == "(")
else if ( declaration[i].second == ")")
if (seen_first_part && local_paren_count == 0)
return true;
return false;
// ----------------------------------------------------------------------------------------
enum scope_type
void process_file (
istream& fin,
const string& file,
std::vector<tok_function_record>& functions,
std::vector<tok_class_record>& classes
- scans the given file for global functions and appends any found into functions.
- scans the given file for global classes and appends any found into classes.
tok_type tok;
bool recently_seen_struct_keyword = false;
// true if we have seen the struct keyword and
// we have not seen any identifiers or { characters
string last_struct_name;
// the name of the last struct we have seen
bool recently_seen_class_keyword = false;
// true if we have seen the class keyword and
// we have not seen any identifiers or { characters
string last_class_name;
// the name of the last class we have seen
bool recently_seen_namespace_keyword = false;
// true if we have seen the namespace keyword and
// we have not seen any identifiers or { characters
string last_namespace_name;
// the name of the last namespace we have seen
bool recently_seen_pound_define = false;
// true if we have seen a #define and haven't seen an unescaped newline
bool recently_seen_preprocessor = false;
// true if we have seen a preprocessor statement and haven't seen an unescaped newline
bool recently_seen_typedef = false;
// true if we have seen a typedef keyword and haven't seen a ;
bool recently_seen_paren_0 = false;
// true if we have seen paren_count transition to zero but haven't yet seen a ; or { or
// a new line if recently_seen_pound_define is true.
bool recently_seen_slots = false;
// true if we have seen the identifier "slots" at a zero scope but haven't seen any
// other identifiers or the ';' or ':' characters.
bool recently_seen_closing_bracket = false;
// true if we have seen a } and haven't yet seen an IDENTIFIER or ;
bool recently_seen_new_scope = false;
// true if we have seen the keywords class, namespace, struct, or extern and
// we have not seen the characters {, ), or ; since then
bool at_top_of_new_scope = false;
// true if we have seen the { that started a new scope but haven't seen anything yet but WHITE_SPACE
std::vector<string> namespaces;
// a stack to hold the names of the scopes we have entered. This is the classes, structs, and namespaces we enter.
namespaces.push_back(""); // this is the global namespace
std::stack<scope_type> scope_access;
// If the stack isn't empty then we are inside a class or struct and the top value
// in the stack tells if we are in a public, protected, or private region.
std::stack<unsigned long> scopes; // a stack to hold current and old scope counts
// the top of the stack counts the number of new scopes (i.e. unmatched { } we have entered
// since we were at a scope where functions can be defined.
// We also maintain the invariant that scopes.size() == namespaces.size()
std::stack<tok_class_record> class_stack;
// This is a stack where == the incomplete class record for the class declaration we are
// currently in.
unsigned long paren_count = 0;
// this is the number of ( we have seen minus the number of ) we have
// seen.
std::vector<std::pair<int,string> > token_accum;
// Used to accumulate tokens for function and class declarations
std::vector<std::pair<int,string> > last_full_declaration;
// Once we determine that token_accum has a full declaration in it we copy it into last_full_declaration.
int type;
string token;
tok.get_token(type, token);
while (type != tok_type::END_OF_FILE)
case tok_type::KEYWORD: // ------------------------------------------
if (token[0] == '#')
recently_seen_preprocessor = true;
if (token == "class")
recently_seen_class_keyword = true;
recently_seen_new_scope = true;
else if (token == "struct")
recently_seen_struct_keyword = true;
recently_seen_new_scope = true;
else if (token == "namespace")
recently_seen_namespace_keyword = true;
recently_seen_new_scope = true;
else if (token == "extern")
recently_seen_new_scope = true;
else if (token == "#define")
recently_seen_pound_define = true;
else if (token == "typedef")
recently_seen_typedef = true;
else if (recently_seen_pound_define == false)
// eat white space
int temp_type;
string temp_token;
if (tok.peek_type() == tok_type::WHITE_SPACE)
tok.get_token(temp_type, temp_token);
const bool next_is_colon = (tok.peek_type() == tok_type::OTHER && tok.peek_token() == ":");
if (next_is_colon)
// eat the colon
tok.get_token(temp_type, temp_token);
if (scope_access.size() > 0 && token == "public")
{ = public_scope;
else if (scope_access.size() > 0 && token == "protected")
{ = protected_scope;
else if (scope_access.size() > 0 && token == "private")
{ = private_scope;
at_top_of_new_scope = false;
case tok_type::COMMENT: // ------------------------------------------
if ( == 0 && last_full_declaration.size() > 0 && is_formal_comment(token) &&
paren_count == 0)
// if we are inside a class or struct
if (scope_access.size() > 0)
// if we are looking at a comment at the top of a class
if (at_top_of_new_scope)
// push an entry for this class into the class_stack
tok_class_record temp;
temp.declaration = last_full_declaration;
temp.file = file; = namespaces.back();
temp.scope = make_scope_string(namespaces,1);
temp.comment = token;
else if ( == public_scope || == protected_scope)
// This should be a member function.
// Only do anything if the class that contains this member function is
// in the class_stack.
if (class_stack.size() > 0 && == namespaces.back() &&
tok_method_record temp;
// Check if there is an initialization list inside the declaration and if there is
// then find out where the starting : is located so we can avoid including it in
// the output.
unsigned long pos = last_full_declaration.size();
long temp_paren_count = 0;
for (unsigned long i = 0; i < last_full_declaration.size(); ++i)
if (last_full_declaration[i].first == tok_type::OTHER)
if (last_full_declaration[i].second == "(")
else if (last_full_declaration[i].second == ")")
else if (temp_paren_count == 0 && last_full_declaration[i].second == ":")
// if this is a :: then ignore it
if (i > 0 && last_full_declaration[i-1].second == ":")
else if (i+1 < last_full_declaration.size() && last_full_declaration[i+1].second == ":")
pos = i;
temp.declaration.assign(last_full_declaration.begin(), last_full_declaration.begin()+pos);
temp.comment = token;
if ( == public_scope);
// we should be looking at a global declaration of some kind.
if (looks_like_function_declaration(last_full_declaration))
tok_function_record temp;
// make sure we never include anything beyond the first closing )
// if we are looking at a #defined function
unsigned long pos = last_full_declaration.size();
if (last_full_declaration[0].second == "#define")
long temp_paren_count = 0;
for (unsigned long i = 0; i < last_full_declaration.size(); ++i)
if (last_full_declaration[i].first == tok_type::OTHER)
if (last_full_declaration[i].second == "(")
else if (last_full_declaration[i].second == ")")
if (temp_paren_count == 0)
pos = i+1;
temp.declaration.assign(last_full_declaration.begin(), last_full_declaration.begin()+pos);
temp.file = file;
temp.scope = make_scope_string(namespaces);
temp.comment = token;
at_top_of_new_scope = false;
case tok_type::IDENTIFIER: // ------------------------------------------
if (recently_seen_class_keyword)
last_class_name = token;
else if (recently_seen_struct_keyword)
last_struct_name = token;
else if (recently_seen_namespace_keyword)
last_namespace_name = token;
if ( == 0 && token == "slots")
recently_seen_slots = true;
recently_seen_slots = false;
recently_seen_class_keyword = false;
recently_seen_struct_keyword = false;
recently_seen_namespace_keyword = false;
recently_seen_closing_bracket = false;
at_top_of_new_scope = false;
case tok_type::OTHER: // ------------------------------------------
case '{':
// if we are entering a new scope
if (recently_seen_new_scope)
at_top_of_new_scope = true;
// if we are entering a class
if (last_class_name.size() > 0)
else if (last_struct_name.size() > 0)
else if (last_namespace_name.size() > 0)
{ += 1;
recently_seen_new_scope = false;
recently_seen_class_keyword = false;
recently_seen_struct_keyword = false;
recently_seen_namespace_keyword = false;
recently_seen_paren_0 = false;
// a { at function scope is an end of a potential declaration
if ( == 0)
// put token_accum into last_full_declaration
case '}':
if ( > 0)
{ -= 1;
else if (scopes.size() > 1)
if (scope_access.size() > 0)
// If the scope we are leaving is the top class on the class_stack
// then we need to either pop it into its containing class or put it
// into the classes output vector.
if (class_stack.size() > 0 && namespaces.back() ==
// If this class is a inner_class of another then push it into the
// public_inner_classes or protected_inner_classes field of its containing class.
if (class_stack.size() > 1)
tok_class_record temp =;
if (scope_access.size() > 0)
if ( == public_scope);
else if ( == protected_scope);
else if (class_stack.size() > 0)
recently_seen_closing_bracket = true;
at_top_of_new_scope = false;
case ';':
// a ; at function scope is an end of a potential declaration
if ( == 0)
// put token_accum into last_full_declaration
// if we are inside the public area of a class and this ; might be the end
// of a typedef or variable declaration
if ( == 0 && scope_access.size() > 0 &&
( == public_scope || == protected_scope) &&
recently_seen_closing_bracket == false)
if (recently_seen_typedef)
// This should be a typedef inside the public area of a class or struct:
// Only do anything if the class that contains this typedef is in the class_stack.
if (class_stack.size() > 0 && == namespaces.back())
tok_typedef_record temp;
temp.declaration = last_full_declaration;
if ( == public_scope);
else if (recently_seen_paren_0 == false && recently_seen_new_scope == false)
// This should be some kind of public variable declaration inside a class or struct:
// Only do anything if the class that contains this member variable is in the class_stack.
if (class_stack.size() > 0 && == namespaces.back())
tok_variable_record temp;
temp.declaration = last_full_declaration;
if ( == public_scope);
recently_seen_new_scope = false;
recently_seen_typedef = false;
recently_seen_paren_0 = false;
recently_seen_closing_bracket = false;
recently_seen_slots = false;
at_top_of_new_scope = false;
case ':':
if (recently_seen_slots)
recently_seen_slots = false;
case '(':
at_top_of_new_scope = false;
case ')':
if (paren_count == 0)
recently_seen_paren_0 = true;
if ( == 0)
last_full_declaration = token_accum;
recently_seen_new_scope = false;
at_top_of_new_scope = false;
at_top_of_new_scope = false;
case tok_type::WHITE_SPACE: // ------------------------------------------
if (recently_seen_pound_define)
if (contains_unescaped_newline(token))
recently_seen_pound_define = false;
recently_seen_paren_0 = false;
recently_seen_preprocessor = false;
// this is an end of a potential declaration
if (recently_seen_preprocessor)
if (contains_unescaped_newline(token))
recently_seen_preprocessor = false;
default: // ------------------------------------------
at_top_of_new_scope = false;
tok.get_token(type, token);
// ----------------------------------------------------------------------------------------
string get_function_name (
const std::vector<std::pair<int,string> >& declaration
string name;
bool contains_operator = false;
unsigned long operator_pos = 0;
for (unsigned long i = 0; i < declaration.size(); ++i)
if (declaration[i].first == tok_type::KEYWORD &&
declaration[i].second == "operator")
contains_operator = true;
operator_pos = i;
// find the opening ( for the function
unsigned long paren_pos = 0;
long paren_count = 0;
for (long i = declaration.size()-1; i >= 0; --i)
if (declaration[i].first == tok_type::OTHER &&
declaration[i].second == ")")
else if (declaration[i].first == tok_type::OTHER &&
declaration[i].second == "(")
if (paren_count == 0)
paren_pos = i;
if (contains_operator)
name = declaration[operator_pos].second;
for (unsigned long i = operator_pos+1; i < paren_pos; ++i)
if (declaration[i].first == tok_type::IDENTIFIER || declaration[i].first == tok_type::KEYWORD)
name += " ";
name += declaration[i].second;
// if this is a destructor then include the ~
if (paren_pos > 1 && declaration[paren_pos-2].second == "~")
name = "~" + declaration[paren_pos-1].second;
else if (paren_pos > 0)
name = declaration[paren_pos-1].second;
return name;
// ----------------------------------------------------------------------------------------
string pretty_print_declaration (
const std::vector<std::pair<int,string> >& decl
string temp;
long angle_count = 0;
long paren_count = 0;
if (decl.size() == 0)
return temp;
temp = decl[0].second;
bool just_closed_template = false;
bool in_template = false;
bool last_was_scope_res = false;
bool seen_operator = false;
if (temp == "operator")
seen_operator = true;
for (unsigned long i = 1; i < decl.size(); ++i)
bool last_was_less_than = false;
if (decl[i-1].first == tok_type::OTHER && decl[i-1].second == "<")
last_was_less_than = true;
if (decl[i].first == tok_type::OTHER && decl[i].second == "<" &&
(decl[i-1].second != "operator" && ((i>1 && decl[i-2].second != "operator") || decl[i-1].second != "<") ))
if (decl[i-1].first == tok_type::KEYWORD && decl[i-1].second == "template" &&
decl[i].first == tok_type::OTHER && decl[i].second == "<")
in_template = true;
temp += " <\n ";
else if (decl[i].first == tok_type::OTHER && decl[i].second == ">")
// don't count angle brackets when they are part of an operator
if (decl[i-1].second != "operator" && ((i>1 && decl[i-2].second != "operator") || decl[i-1].second != ">"))
if (angle_count == 0 && in_template)
temp += "\n >\n";
just_closed_template = true;
in_template = false;
temp += ">";
else if (decl[i].first == tok_type::OTHER && decl[i].second == "<")
temp += "<";
else if (decl[i].first == tok_type::OTHER && decl[i].second == ",")
if (in_template || (paren_count == 1 && angle_count == 0))
temp += ",\n ";
temp += ",";
else if (decl[i].first == tok_type::OTHER && decl[i].second == "&")
temp += "&";
else if (decl[i].first == tok_type::OTHER && decl[i].second == ".")
temp += ".";
else if (decl[i].first == tok_type::SINGLE_QUOTED_TEXT)
temp += decl[i].second;
else if (decl[i].first == tok_type::DOUBLE_QUOTED_TEXT)
temp += decl[i].second;
else if (decl[i-1].first == tok_type::SINGLE_QUOTED_TEXT && decl[i].second == "'")
temp += decl[i].second;
else if (decl[i-1].first == tok_type::DOUBLE_QUOTED_TEXT && decl[i].second == "\"")
temp += decl[i].second;
else if (decl[i].first == tok_type::OTHER && decl[i].second == "[")
temp += "[";
else if (decl[i].first == tok_type::OTHER && decl[i].second == "]")
temp += "]";
else if (decl[i].first == tok_type::OTHER && decl[i].second == "-")
temp += "-";
else if (decl[i].first == tok_type::NUMBER)
if (decl[i-1].second == "=")
temp += " " + decl[i].second;
temp += decl[i].second;
else if (decl[i].first == tok_type::OTHER && decl[i].second == "*")
temp += "*";
else if (decl[i].first == tok_type::KEYWORD && decl[i].second == "operator")
temp += "\noperator";
seen_operator = true;
else if (decl[i].first == tok_type::OTHER && decl[i].second == ":" &&
(decl[i-1].second == ":" || (i+1<decl.size() && decl[i+1].second == ":") ) )
temp += ":";
last_was_scope_res = true;
else if (decl[i].first == tok_type::OTHER && decl[i].second == "(")
const bool next_is_paren = (i+1 < decl.size() && decl[i+1].first == tok_type::OTHER && decl[i+1].second == ")");
if (paren_count == 0 && next_is_paren == false && in_template == false)
temp += " (\n ";
temp += "(";
else if (decl[i].first == tok_type::OTHER && decl[i].second == ")")
if (paren_count == 0 && decl[i-1].second != "(" && in_template == false)
temp += "\n)";
temp += ")";
else if (decl[i].first == tok_type::IDENTIFIER && i+1 < decl.size() &&
decl[i+1].first == tok_type::OTHER && decl[i+1].second == "(")
if (just_closed_template || paren_count != 0 || decl[i-1].second == "~")
temp += decl[i].second;
else if (seen_operator)
temp += " " + decl[i].second;
temp += "\n" + decl[i].second;
just_closed_template = false;
last_was_scope_res = false;
if (just_closed_template || last_was_scope_res || last_was_less_than ||
(seen_operator && paren_count == 0 && decl[i].first == tok_type::OTHER ) ||
((decl[i].first == tok_type::KEYWORD || decl[i].first == tok_type::IDENTIFIER) && i>0 && decl[i-1].second == "("))
temp += decl[i].second;
temp += " " + decl[i].second;
just_closed_template = false;
last_was_scope_res = false;
return temp;
// ----------------------------------------------------------------------------------------
string format_comment (
const string& comment,
const unsigned long expand_tabs
if (comment.size() <= 6)
return "";
string temp = trim(trim(comment.substr(3,comment.size()-6), " \t"), "\n\r");
// if we should expand tabs to spaces
if (expand_tabs != 0)
unsigned long column = 0;
string str;
for (unsigned long i = 0; i < temp.size(); ++i)
if (temp[i] == '\t')
const unsigned long num_spaces = expand_tabs - column%expand_tabs;
column += num_spaces;
str.insert(str.end(), num_spaces, ' ');
else if (temp[i] == '\n' || temp[i] == '\r')
str += temp[i];
column = 0;
str += temp[i];
// put str into temp
// now figure out what the smallest amount of leading white space is and remove it from each line.
unsigned long num_whitespace = 100000;
string::size_type pos1 = 0, pos2 = 0;
while (pos1 != string::npos)
// find start of non-white-space
pos2 = temp.find_first_not_of(" \t",pos1);
// if this is a line of just white space then ignore it
if (pos2 != string::npos && temp[pos2] != '\n' && temp[pos2] != '\r')
if (pos2-pos1 < num_whitespace)
num_whitespace = pos2-pos1;
// find end-of-line
pos1 = temp.find_first_of("\n\r", pos2);
// find start of next line
pos2 = temp.find_first_not_of("\n\r", pos1);
pos1 = pos2;
// now remove the leading white space
string temp2;
unsigned long counter = 0;
for (unsigned long i = 0; i < temp.size(); ++i)
// if we are looking at a new line
if (temp[i] == '\n' || temp[i] == '\r')
counter = 0;
else if (counter < num_whitespace)
temp2 += temp[i];
return temp2;
// ----------------------------------------------------------------------------------------
typedef_record convert_tok_typedef_record (
const tok_typedef_record& rec
typedef_record temp;
temp.declaration = pretty_print_declaration(rec.declaration);
return temp;
// ----------------------------------------------------------------------------------------
variable_record convert_tok_variable_record (
const tok_variable_record& rec
variable_record temp;
temp.declaration = pretty_print_declaration(rec.declaration);
return temp;
// ----------------------------------------------------------------------------------------
method_record convert_tok_method_record (
const tok_method_record& rec,
const unsigned long expand_tabs
method_record temp;
temp.comment = format_comment(rec.comment, expand_tabs); = get_function_name(rec.declaration);
temp.declaration = pretty_print_declaration(rec.declaration);
return temp;
// ----------------------------------------------------------------------------------------
class_record convert_tok_class_record (
const tok_class_record& rec,
const unsigned long expand_tabs
class_record crec;
crec.scope = rec.scope;
crec.file = rec.file;
crec.comment = format_comment(rec.comment, expand_tabs);;
// find the first class token
for (unsigned long i = 0; i+1 < rec.declaration.size(); ++i)
if (rec.declaration[i].first == tok_type::KEYWORD &&
(rec.declaration[i].second == "class" ||
rec.declaration[i].second == "struct" )
{ = rec.declaration[i+1].second;
crec.declaration = pretty_print_declaration(rec.declaration);
for (unsigned long i = 0; i < rec.public_typedefs.size(); ++i)
for (unsigned long i = 0; i < rec.public_variables.size(); ++i)
for (unsigned long i = 0; i < rec.protected_typedefs.size(); ++i)
for (unsigned long i = 0; i < rec.protected_variables.size(); ++i)
for (unsigned long i = 0; i < rec.public_methods.size(); ++i)
crec.public_methods.push_back(convert_tok_method_record(rec.public_methods[i], expand_tabs));
for (unsigned long i = 0; i < rec.protected_methods.size(); ++i)
crec.protected_methods.push_back(convert_tok_method_record(rec.protected_methods[i], expand_tabs));
for (unsigned long i = 0; i < rec.public_inner_classes.size(); ++i)
crec.public_inner_classes.push_back(convert_tok_class_record(rec.public_inner_classes[i], expand_tabs));
for (unsigned long i = 0; i < rec.protected_inner_classes.size(); ++i)
crec.protected_inner_classes.push_back(convert_tok_class_record(rec.protected_inner_classes[i], expand_tabs));
return crec;
// ----------------------------------------------------------------------------------------
function_record convert_tok_function_record (
const tok_function_record& rec,
const unsigned long expand_tabs
function_record temp;
temp.scope = rec.scope;
temp.file = rec.file;
temp.comment = format_comment(rec.comment, expand_tabs); = get_function_name(rec.declaration);
temp.declaration = pretty_print_declaration(rec.declaration);
return temp;
// ----------------------------------------------------------------------------------------
void convert_to_normal_records (
const std::vector<tok_function_record>& tok_functions,
const std::vector<tok_class_record>& tok_classes,
const unsigned long expand_tabs,
std::vector<function_record>& functions,
std::vector<class_record>& classes
for (unsigned long i = 0; i < tok_functions.size(); ++i)
functions.push_back(convert_tok_function_record(tok_functions[i], expand_tabs));
for (unsigned long i = 0; i < tok_classes.size(); ++i)
classes.push_back(convert_tok_class_record(tok_classes[i], expand_tabs));
// ----------------------------------------------------------------------------------------
string add_entity_ref (const string& str)
string temp;
for (unsigned long i = 0; i < str.size(); ++i)
if (str[i] == '&')
temp += "&amp;";
else if (str[i] == '<')
temp += "&lt;";
else if (str[i] == '>')
temp += "&gt;";
temp += str[i];
return temp;
// ----------------------------------------------------------------------------------------
string flip_slashes (string str)
for (unsigned long i = 0; i < str.size(); ++i)
if (str[i] == '\\')
str[i] = '/';
return str;
// ----------------------------------------------------------------------------------------
void write_as_xml (
const function_record& rec,
ostream& fout
fout << " <function>\n";
fout << " <name>" << add_entity_ref( << "</name>\n";
fout << " <scope>" << add_entity_ref(rec.scope) << "</scope>\n";
fout << " <declaration>" << add_entity_ref(rec.declaration) << "</declaration>\n";
fout << " <file>" << flip_slashes(add_entity_ref(rec.file)) << "</file>\n";
fout << " <comment>" << add_entity_ref(rec.comment) << "</comment>\n";
fout << " </function>\n";
// ----------------------------------------------------------------------------------------
void write_as_xml (
const class_record& rec,
ostream& fout,
unsigned long indent
const string pad(indent, ' ');
fout << pad << "<class>\n";
fout << pad << " <name>" << add_entity_ref( << "</name>\n";
fout << pad << " <scope>" << add_entity_ref(rec.scope) << "</scope>\n";
fout << pad << " <declaration>" << add_entity_ref(rec.declaration) << "</declaration>\n";
fout << pad << " <file>" << flip_slashes(add_entity_ref(rec.file)) << "</file>\n";
fout << pad << " <comment>" << add_entity_ref(rec.comment) << "</comment>\n";
if (rec.public_typedefs.size() > 0)
fout << pad << " <public_typedefs>\n";
for (unsigned long i = 0; i < rec.public_typedefs.size(); ++i)
fout << pad << " <typedef>" << add_entity_ref(rec.public_typedefs[i].declaration) << "</typedef>\n";
fout << pad << " </public_typedefs>\n";
if (rec.public_variables.size() > 0)
fout << pad << " <public_variables>\n";
for (unsigned long i = 0; i < rec.public_variables.size(); ++i)
fout << pad << " <variable>" << add_entity_ref(rec.public_variables[i].declaration) << "</variable>\n";
fout << pad << " </public_variables>\n";
if (rec.protected_typedefs.size() > 0)
fout << pad << " <protected_typedefs>\n";
for (unsigned long i = 0; i < rec.protected_typedefs.size(); ++i)
fout << pad << " <typedef>" << add_entity_ref(rec.protected_typedefs[i].declaration) << "</typedef>\n";
fout << pad << " </protected_typedefs>\n";
if (rec.protected_variables.size() > 0)
fout << pad << " <protected_variables>\n";
for (unsigned long i = 0; i < rec.protected_variables.size(); ++i)
fout << pad << " <variable>" << add_entity_ref(rec.protected_variables[i].declaration) << "</variable>\n";
fout << pad << " </protected_variables>\n";
if (rec.public_methods.size() > 0)
fout << pad << " <public_methods>\n";
for (unsigned long i = 0; i < rec.public_methods.size(); ++i)
fout << pad << " <method>\n";
fout << pad << " <name>" << add_entity_ref(rec.public_methods[i].name) << "</name>\n";
fout << pad << " <declaration>" << add_entity_ref(rec.public_methods[i].declaration) << "</declaration>\n";
fout << pad << " <comment>" << add_entity_ref(rec.public_methods[i].comment) << "</comment>\n";
fout << pad << " </method>\n";
fout << pad << " </public_methods>\n";
if (rec.protected_methods.size() > 0)
fout << pad << " <protected_methods>\n";
for (unsigned long i = 0; i < rec.protected_methods.size(); ++i)
fout << pad << " <method>\n";
fout << pad << " <name>" << add_entity_ref(rec.protected_methods[i].name) << "</name>\n";
fout << pad << " <declaration>" << add_entity_ref(rec.protected_methods[i].declaration) << "</declaration>\n";
fout << pad << " <comment>" << add_entity_ref(rec.protected_methods[i].comment) << "</comment>\n";
fout << pad << " </method>\n";
fout << pad << " </protected_methods>\n";
if (rec.public_inner_classes.size() > 0)
fout << pad << " <public_inner_classes>\n";
for (unsigned long i = 0; i < rec.public_inner_classes.size(); ++i)
write_as_xml(rec.public_inner_classes[i], fout, indent+4);
fout << pad << " </public_inner_classes>\n";
if (rec.protected_inner_classes.size() > 0)
fout << pad << " <protected_inner_classes>\n";
for (unsigned long i = 0; i < rec.protected_inner_classes.size(); ++i)
write_as_xml(rec.protected_inner_classes[i], fout, indent+4);
fout << pad << " </protected_inner_classes>\n";
fout << pad << "</class>\n";
// ----------------------------------------------------------------------------------------
void save_to_xml_file (
const std::vector<function_record>& functions,
const std::vector<class_record>& classes
ofstream fout("output.xml");
fout << "<!-- This XML file was generated using the htmlify tool available from -->" << endl;
fout << "<code>" << endl;
fout << " <classes>" << endl;
for (unsigned long i = 0; i < classes.size(); ++i)
write_as_xml(classes[i], fout, 4);
fout << "\n";
fout << " </classes>\n\n" << endl;
fout << " <global_functions>" << endl;
for (unsigned long i = 0; i < functions.size(); ++i)
write_as_xml(functions[i], fout);
fout << "\n";
fout << " </global_functions>" << endl;
fout << "</code>" << endl;
// ----------------------------------------------------------------------------------------
void generate_xml_markup(
const cmd_line_parser<char>::check_1a_c& parser,
const std::string& filter,
const unsigned long search_depth,
const unsigned long expand_tabs
// first figure out which files should be processed
std::vector<std::pair<string,string> > files;
obtain_list_of_files(parser, filter, search_depth, files);
std::vector<tok_function_record> tok_functions;
std::vector<tok_class_record> tok_classes;
for (unsigned long i = 0; i < files.size(); ++i)
ifstream fin(files[i].second.c_str());
if (!fin)
cerr << "Error opening file: " << files[i].second << endl;
process_file(fin, files[i].first, tok_functions, tok_classes);
std::vector<function_record> functions;
std::vector<class_record> classes;
convert_to_normal_records(tok_functions, tok_classes, expand_tabs, functions, classes);
save_to_xml_file(functions, classes);
// ----------------------------------------------------------------------------------------