Reinforcement Learning
ml-agents
TensorBoard
ONNX
Pyramids
deep-reinforcement-learning
ML-Agents-Pyramids
Instructions to use loisonchambers/ppo-PyramidsTraining with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- ml-agents
How to use loisonchambers/ppo-PyramidsTraining with ml-agents:
mlagents-load-from-hf --repo-id="loisonchambers/ppo-PyramidsTraining" --local-dir="./download: string[]s"
- Notebooks
- Google Colab
- Kaggle
| { | |
| "name": "root", | |
| "gauges": { | |
| "Pyramids.Policy.Entropy.mean": { | |
| "value": 0.4998490810394287, | |
| "min": 0.4728173017501831, | |
| "max": 1.4580899477005005, | |
| "count": 33 | |
| }, | |
| "Pyramids.Policy.Entropy.sum": { | |
| "value": 15043.4580078125, | |
| "min": 14237.474609375, | |
| "max": 44232.6171875, | |
| "count": 33 | |
| }, | |
| "Pyramids.Step.mean": { | |
| "value": 989952.0, | |
| "min": 29983.0, | |
| "max": 989952.0, | |
| "count": 33 | |
| }, | |
| "Pyramids.Step.sum": { | |
| "value": 989952.0, | |
| "min": 29983.0, | |
| "max": 989952.0, | |
| "count": 33 | |
| }, | |
| "Pyramids.Policy.ExtrinsicValueEstimate.mean": { | |
| "value": 0.2735965847969055, | |
| "min": -0.1106395274400711, | |
| "max": 0.29286447167396545, | |
| "count": 33 | |
| }, | |
| "Pyramids.Policy.ExtrinsicValueEstimate.sum": { | |
| "value": 71.1351089477539, | |
| "min": -26.664125442504883, | |
| "max": 74.3875732421875, | |
| "count": 33 | |
| }, | |
| "Pyramids.Policy.RndValueEstimate.mean": { | |
| "value": 0.034446023404598236, | |
| "min": 0.011637752875685692, | |
| "max": 0.33293867111206055, | |
| "count": 33 | |
| }, | |
| "Pyramids.Policy.RndValueEstimate.sum": { | |
| "value": 8.955965995788574, | |
| "min": 2.955989122390747, | |
| "max": 79.2394027709961, | |
| "count": 33 | |
| }, | |
| "Pyramids.Losses.PolicyLoss.mean": { | |
| "value": 0.06774516822028374, | |
| "min": 0.06390888027395827, | |
| "max": 0.07277049920866974, | |
| "count": 33 | |
| }, | |
| "Pyramids.Losses.PolicyLoss.sum": { | |
| "value": 0.9484323550839725, | |
| "min": 0.48003731949299033, | |
| "max": 1.0595410776012988, | |
| "count": 33 | |
| }, | |
| "Pyramids.Losses.ValueLoss.mean": { | |
| "value": 0.010847047726787658, | |
| "min": 0.00011240023208295882, | |
| "max": 0.010847047726787658, | |
| "count": 33 | |
| }, | |
| "Pyramids.Losses.ValueLoss.sum": { | |
| "value": 0.15185866817502722, | |
| "min": 0.0015736032491614235, | |
| "max": 0.15185866817502722, | |
| "count": 33 | |
| }, | |
| "Pyramids.Policy.LearningRate.mean": { | |
| "value": 7.560226051385713e-06, | |
| "min": 7.560226051385713e-06, | |
| "max": 0.0002952370730162143, | |
| "count": 33 | |
| }, | |
| "Pyramids.Policy.LearningRate.sum": { | |
| "value": 0.00010584316471939998, | |
| "min": 0.00010584316471939998, | |
| "max": 0.0033817742727419997, | |
| "count": 33 | |
| }, | |
| "Pyramids.Policy.Epsilon.mean": { | |
| "value": 0.10252004285714286, | |
| "min": 0.10252004285714286, | |
| "max": 0.19841235714285713, | |
| "count": 33 | |
| }, | |
| "Pyramids.Policy.Epsilon.sum": { | |
| "value": 1.4352806, | |
| "min": 1.3888865, | |
| "max": 2.527258, | |
| "count": 33 | |
| }, | |
| "Pyramids.Policy.Beta.mean": { | |
| "value": 0.00026175228142857144, | |
| "min": 0.00026175228142857144, | |
| "max": 0.009841394478571427, | |
| "count": 33 | |
| }, | |
| "Pyramids.Policy.Beta.sum": { | |
| "value": 0.00366453194, | |
| "min": 0.00366453194, | |
| "max": 0.1127530742, | |
| "count": 33 | |
| }, | |
| "Pyramids.Losses.RNDLoss.mean": { | |
| "value": 0.016172276809811592, | |
| "min": 0.01579626277089119, | |
| "max": 0.42078861594200134, | |
| "count": 33 | |
| }, | |
| "Pyramids.Losses.RNDLoss.sum": { | |
| "value": 0.2264118641614914, | |
| "min": 0.22114768624305725, | |
| "max": 2.9455204010009766, | |
| "count": 33 | |
| }, | |
| "Pyramids.Environment.EpisodeLength.mean": { | |
| "value": 521.5833333333334, | |
| "min": 521.5833333333334, | |
| "max": 999.0, | |
| "count": 33 | |
| }, | |
| "Pyramids.Environment.EpisodeLength.sum": { | |
| "value": 31295.0, | |
| "min": 16526.0, | |
| "max": 33195.0, | |
| "count": 33 | |
| }, | |
| "Pyramids.Environment.CumulativeReward.mean": { | |
| "value": 1.011603310952584, | |
| "min": -0.9999500517733395, | |
| "max": 1.0499265009955483, | |
| "count": 33 | |
| }, | |
| "Pyramids.Environment.CumulativeReward.sum": { | |
| "value": 60.69619865715504, | |
| "min": -31.998401656746864, | |
| "max": 60.69619865715504, | |
| "count": 33 | |
| }, | |
| "Pyramids.Policy.ExtrinsicReward.mean": { | |
| "value": 1.011603310952584, | |
| "min": -0.9999500517733395, | |
| "max": 1.0499265009955483, | |
| "count": 33 | |
| }, | |
| "Pyramids.Policy.ExtrinsicReward.sum": { | |
| "value": 60.69619865715504, | |
| "min": -31.998401656746864, | |
| "max": 60.69619865715504, | |
| "count": 33 | |
| }, | |
| "Pyramids.Policy.RndReward.mean": { | |
| "value": 0.08584638049869682, | |
| "min": 0.08584638049869682, | |
| "max": 8.502948817523087, | |
| "count": 33 | |
| }, | |
| "Pyramids.Policy.RndReward.sum": { | |
| "value": 5.150782829921809, | |
| "min": 4.382882176010753, | |
| "max": 144.55012989789248, | |
| "count": 33 | |
| }, | |
| "Pyramids.IsTraining.mean": { | |
| "value": 1.0, | |
| "min": 1.0, | |
| "max": 1.0, | |
| "count": 33 | |
| }, | |
| "Pyramids.IsTraining.sum": { | |
| "value": 1.0, | |
| "min": 1.0, | |
| "max": 1.0, | |
| "count": 33 | |
| } | |
| }, | |
| "metadata": { | |
| "timer_format_version": "0.1.0", | |
| "start_time_seconds": "1775641717", | |
| "python_version": "3.10.12 (main, Jul 5 2023, 18:54:27) [GCC 11.2.0]", | |
| "command_line_arguments": "/usr/local/bin/mlagents-learn ./config/ppo/PyramidsRND.yaml --env=./training-envs-executables/linux/Pyramids/Pyramids --run-id=PyramidsTraining --no-graphics", | |
| "mlagents_version": "1.2.0.dev0", | |
| "mlagents_envs_version": "1.2.0.dev0", | |
| "communication_protocol_version": "1.5.0", | |
| "pytorch_version": "2.8.0+cu128", | |
| "numpy_version": "1.23.5", | |
| "end_time_seconds": "1775644175" | |
| }, | |
| "total": 2458.7961392909997, | |
| "count": 1, | |
| "self": 0.529840712999885, | |
| "children": { | |
| "run_training.setup": { | |
| "total": 0.024792724999997517, | |
| "count": 1, | |
| "self": 0.024792724999997517 | |
| }, | |
| "TrainerController.start_learning": { | |
| "total": 2458.2415058529996, | |
| "count": 1, | |
| "self": 1.620740601055786, | |
| "children": { | |
| "TrainerController._reset_env": { | |
| "total": 2.2551514990000214, | |
| "count": 1, | |
| "self": 2.2551514990000214 | |
| }, | |
| "TrainerController.advance": { | |
| "total": 2454.268330093944, | |
| "count": 63333, | |
| "self": 1.6589472269056387, | |
| "children": { | |
| "env_step": { | |
| "total": 1726.443126869043, | |
| "count": 63333, | |
| "self": 1550.6996559902168, | |
| "children": { | |
| "SubprocessEnvManager._take_step": { | |
| "total": 174.753622907901, | |
| "count": 63333, | |
| "self": 5.3960583648686224, | |
| "children": { | |
| "TorchPolicy.evaluate": { | |
| "total": 169.35756454303237, | |
| "count": 62554, | |
| "self": 169.35756454303237 | |
| } | |
| } | |
| }, | |
| "workers": { | |
| "total": 0.9898479709252115, | |
| "count": 63333, | |
| "self": 0.0, | |
| "children": { | |
| "worker_root": { | |
| "total": 2451.6542174919828, | |
| "count": 63333, | |
| "is_parallel": true, | |
| "self": 1036.9193373230612, | |
| "children": { | |
| "run_training.setup": { | |
| "total": 0.0, | |
| "count": 0, | |
| "is_parallel": true, | |
| "self": 0.0, | |
| "children": { | |
| "steps_from_proto": { | |
| "total": 0.0019013670000731508, | |
| "count": 1, | |
| "is_parallel": true, | |
| "self": 0.0005660070000885753, | |
| "children": { | |
| "_process_rank_one_or_two_observation": { | |
| "total": 0.0013353599999845756, | |
| "count": 8, | |
| "is_parallel": true, | |
| "self": 0.0013353599999845756 | |
| } | |
| } | |
| }, | |
| "UnityEnvironment.step": { | |
| "total": 0.05508255199993073, | |
| "count": 1, | |
| "is_parallel": true, | |
| "self": 0.0010825760000443552, | |
| "children": { | |
| "UnityEnvironment._generate_step_input": { | |
| "total": 0.000504071000023032, | |
| "count": 1, | |
| "is_parallel": true, | |
| "self": 0.000504071000023032 | |
| }, | |
| "communicator.exchange": { | |
| "total": 0.051618872999824816, | |
| "count": 1, | |
| "is_parallel": true, | |
| "self": 0.051618872999824816 | |
| }, | |
| "steps_from_proto": { | |
| "total": 0.0018770320000385254, | |
| "count": 1, | |
| "is_parallel": true, | |
| "self": 0.0004365929994492035, | |
| "children": { | |
| "_process_rank_one_or_two_observation": { | |
| "total": 0.001440439000589322, | |
| "count": 8, | |
| "is_parallel": true, | |
| "self": 0.001440439000589322 | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| }, | |
| "UnityEnvironment.step": { | |
| "total": 1414.7348801689216, | |
| "count": 63332, | |
| "is_parallel": true, | |
| "self": 38.156549167969615, | |
| "children": { | |
| "UnityEnvironment._generate_step_input": { | |
| "total": 26.592376583997975, | |
| "count": 63332, | |
| "is_parallel": true, | |
| "self": 26.592376583997975 | |
| }, | |
| "communicator.exchange": { | |
| "total": 1223.4305275369707, | |
| "count": 63332, | |
| "is_parallel": true, | |
| "self": 1223.4305275369707 | |
| }, | |
| "steps_from_proto": { | |
| "total": 126.55542687998332, | |
| "count": 63332, | |
| "is_parallel": true, | |
| "self": 25.934962827198433, | |
| "children": { | |
| "_process_rank_one_or_two_observation": { | |
| "total": 100.62046405278488, | |
| "count": 506656, | |
| "is_parallel": true, | |
| "self": 100.62046405278488 | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| }, | |
| "trainer_advance": { | |
| "total": 726.166255997995, | |
| "count": 63333, | |
| "self": 2.9151220540338727, | |
| "children": { | |
| "process_trajectory": { | |
| "total": 138.59598950496343, | |
| "count": 63333, | |
| "self": 138.39573979596275, | |
| "children": { | |
| "RLTrainer._checkpoint": { | |
| "total": 0.20024970900067274, | |
| "count": 2, | |
| "self": 0.20024970900067274 | |
| } | |
| } | |
| }, | |
| "_update_policy": { | |
| "total": 584.6551444389977, | |
| "count": 446, | |
| "self": 323.9309060180062, | |
| "children": { | |
| "TorchPPOOptimizer.update": { | |
| "total": 260.72423842099147, | |
| "count": 22821, | |
| "self": 260.72423842099147 | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| }, | |
| "trainer_threads": { | |
| "total": 1.038999471347779e-06, | |
| "count": 1, | |
| "self": 1.038999471347779e-06 | |
| }, | |
| "TrainerController._save_models": { | |
| "total": 0.09728262000044197, | |
| "count": 1, | |
| "self": 0.0012766320005539455, | |
| "children": { | |
| "RLTrainer._checkpoint": { | |
| "total": 0.09600598799988802, | |
| "count": 1, | |
| "self": 0.09600598799988802 | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } |