1. RLPy
  2. Untitled project
  3. RLPy

Commits

RLPy  committed 44ee694

final settings

  • Participants
  • Parent commits 66c362a
  • Branches poleval

Comments (0)

Files changed (4)

File examples/blocksworld/poleval/tabular.py

View file
 
 
 def make_experiment(id=1, path="./Results/Temp/{domain}/poleval/tab",
-                    boyan_N0=9312.,
-                    initial_alpha=0.19433,
-                    lambda_=0.152):
+                    boyan_N0=60.477342,
+                    initial_alpha=0.133447,
+                    lambda_=0.6613):
     logger = Logger()
     max_steps = 1000000
 

File examples/uav/poleval/ifdd.py

View file
 
 
 def make_experiment(id=1, path="./Results/Temp/{domain}/poleval/ifdd/",
-                    discover_threshold=100., #0.42878655,
+                    discover_threshold=96.179851, #0.42878655,
                     lambda_=0.,
-                    boyan_N0=1375.098,
-                    initial_alpha=0.016329):
+                    boyan_N0=60.477,
+                    initial_alpha=0.21021):
     logger = Logger()
     max_steps = 500000
     sparsify = 1

File examples/uav/poleval/ifddk.py

View file
 
 
 def make_experiment(id=1, path="./Results/Temp/{domain}/poleval/ifdd/",
-                    discover_threshold=100., #0.42878655,
-                    lambda_=0.701309,
-                    kappa=1e-7,
-                    boyan_N0=1375.098,
-                    initial_alpha=0.016329):
+                    discover_threshold=17.9708712, #0.42878655,
+                    lambda_=0.57053162,
+                    kappa=7.6e-9,
+                    boyan_N0=78454.41838,
+                    initial_alpha=0.16240166):
     logger = Logger()
     max_steps = 500000
     sparsify = 1

File examples/uav/poleval/tabular.py

View file
     sparsify = 1
     domain = PST(NUM_UAV=4, motionNoise=0, logger=logger)
     pol = StoredPolicy(filename="__rlpy_location__/Policies/PST_4UAV_mediocre_policy_nocache.pck")
-    representation = HashedTabular(domain, logger, memory=20000, safety="super")
+    representation = HashedTabular(domain, logger, memory=40000, safety="super")
     estimator = TDLearning(representation=representation, lambda_=lambda_,
                            boyan_N0=boyan_N0, initial_alpha=initial_alpha, alpha_decay_mode="boyan")
     experiment = PolicyEvaluationExperiment(estimator, domain, pol, max_steps=max_steps, num_checks=20,