updated docs

2024-11-01 10:14:53 +08:00 · 2015-04-29 08:04:04 -04:00 · 2015-04-29 08:04:04 -04:00 · 57a0cda903
commit 57a0cda903
parent 8685719045
2 changed files with 54 additions and 0 deletions
--- a/docs/docs/ml.xml
+++ b/docs/docs/ml.xml
@ -135,6 +135,10 @@ Davis E. King. <a href="http://jmlr.csail.mit.edu/papers/volume10/king09a/king09
         <item>vector_normalizer_frobmetric</item> 
         <item>compute_lda_transform</item> 
      </section>
+      <section>
+         <name>Reinforcement Learning</name>
+         <item>lspi</item> 
+      </section>
      <section>
         <name>Feature Selection</name>
         <item>rank_features</item> 
@ -218,6 +222,7 @@ Davis E. King. <a href="http://jmlr.csail.mit.edu/papers/volume10/king09a/king09
         <item>assignment_function</item>
         <item>track_association_function</item>
         <item>graph_labeler</item>
+         <item>policy</item>
      </section>

      <section>
@ -252,6 +257,7 @@ Davis E. King. <a href="http://jmlr.csail.mit.edu/papers/volume10/king09a/king09
         <item>is_ranking_problem</item>
         <item>count_ranking_inversions</item>
         <item>learn_platt_scaling</item>
+         <item>process_sample</item>

         

@ -1635,6 +1641,51 @@ Davis E. King. <a href="http://jmlr.csail.mit.edu/papers/volume10/king09a/king09
         </examples>
      </component>
      
+   <!-- ************************************************************************* -->
+      
+      <component>
+         <name>lspi</name>
+         <file>dlib/control.h</file>
+         <spec_file link="true">dlib/control/lspi_abstract.h</spec_file>
+         <description>
+            This object is an implementation of the reinforcement learning algorithm
+            described in the following paper:
+            <blockquote>
+               Lagoudakis, Michail G., and Ronald Parr. "Least-squares policy
+               iteration." The Journal of Machine Learning Research 4 (2003):
+               1107-1149.
+            </blockquote>
+                
+         </description>
+      </component>
+      
+   <!-- ************************************************************************* -->
+      
+      <component>
+         <name>policy</name>
+         <file>dlib/control.h</file>
+         <spec_file link="true">dlib/control/approximate_linear_models_abstract.h</spec_file>
+         <description>
+                This is a policy (i.e. a control law) based on a linear function approximator.
+                You can use a tool like <a href="#lspi">lspi</a> to learn the parameters
+                of a policy.
+         </description>
+      </component>
+      
+   <!-- ************************************************************************* -->
+      
+      <component>
+         <name>process_sample</name>
+         <file>dlib/control.h</file>
+         <spec_file link="true">dlib/control/approximate_linear_models_abstract.h</spec_file>
+         <description>
+                This object holds a training sample for a reinforcement learning algorithm 
+                (e.g. <a href="#lspi">lspi</a>).
+                In particular, it contains a state, action, reward, next state sample from
+                some process.
+         </description>
+      </component>
+      
   <!-- ************************************************************************* -->
      
      <component>
--- a/docs/docs/term_index.xml
+++ b/docs/docs/term_index.xml
@ -267,6 +267,9 @@
         <term file="algorithms.html" name="random_subset_selector"        include="dlib/statistics.h"/>
         <term file="algorithms.html" name="randomly_subsample"            include="dlib/statistics.h"/>

+         <term file="ml.html" name="lspi"                                  include="dlib/control.h"/>
+         <term file="ml.html" name="policy"                                include="dlib/control.h"/>
+         <term file="ml.html" name="process_sample"                        include="dlib/control.h"/>
         <term file="ml.html" name="select_all_distinct_labels"            include="dlib/svm.h"/>
         <term file="dlib/svm/multiclass_tools_abstract.h.html" name="find_missing_pairs" include="dlib/svm.h"/>