evorl.algorithms.meta.pbt_workflow

Module Contents

Classes

PBTEvalMetric

PBTOffpolicyTrainMetric

PBTOffpolicyWorkflowTemplate

PBT Workflow Template for Off-policy algorithms with shared replay buffer.

PBTOptState

PBTTrainMetric

PBTWorkflowBase

PBTWorkflowMetric

PBTWorkflowTemplate

Standard PBT Workflow Template.

API

class evorl.algorithms.meta.pbt_workflow.PBTEvalMetric[source]

Bases: evorl.metrics.MetricBase

pop_episode_lengths: chex.Array

None

pop_episode_returns: chex.Array

None

class evorl.algorithms.meta.pbt_workflow.PBTOffpolicyTrainMetric[source]

Bases: evorl.algorithms.meta.pbt_workflow.PBTTrainMetric

rb_size: chex.Array

None

class evorl.algorithms.meta.pbt_workflow.PBTOffpolicyWorkflowTemplate(workflow: evorl.workflows.OffPolicyWorkflow, evaluator: evorl.evaluators.Evaluator, config: omegaconf.DictConfig)[source]

Bases: evorl.algorithms.meta.pbt_workflow.PBTWorkflowTemplate

PBT Workflow Template for Off-policy algorithms with shared replay buffer.

classmethod enable_jit() None[source]
learn(state: evorl.types.State) evorl.types.State[source]
setup(key: chex.PRNGKey)[source]
step(state: evorl.types.State) tuple[evorl.metrics.MetricBase, evorl.types.State][source]
class evorl.algorithms.meta.pbt_workflow.PBTOptState[source]

Bases: evorl.types.PyTreeData

class evorl.algorithms.meta.pbt_workflow.PBTTrainMetric[source]

Bases: evorl.metrics.MetricBase

pop: chex.ArrayTree

None

pop_episode_lengths: chex.Array

None

pop_episode_returns: chex.Array

None

pop_train_metrics: evorl.metrics.MetricBase

None

class evorl.algorithms.meta.pbt_workflow.PBTWorkflowBase(workflow: evorl.workflows.RLWorkflow, evaluator: evorl.evaluators.Evaluator, config: omegaconf.DictConfig)[source]

Bases: evorl.workflows.Workflow

abstract apply_hyperparams_to_workflow_state(workflow_state: evorl.types.State, hyperparams: evorl.types.PyTreeDict[str, chex.Numeric]) evorl.types.State[source]
classmethod build_from_config(config: omegaconf.DictConfig, enable_multi_devices=True, enable_jit: bool = True)[source]
classmethod enable_jit() None[source]
evaluate(state: evorl.types.State) evorl.types.State[source]
abstract exploit_and_explore(pbt_opt_state: evorl.algorithms.meta.pbt_workflow.PBTOptState, pop: chex.ArrayTree, pop_workflow_state: evorl.types.State, pop_metrics: chex.ArrayTree, key: chex.PRNGKey) tuple[chex.ArrayTree, evorl.types.State, evorl.algorithms.meta.pbt_workflow.PBTOptState][source]
setup(key: chex.PRNGKey)[source]
step(state: evorl.types.State) tuple[evorl.metrics.MetricBase, evorl.types.State][source]
class evorl.algorithms.meta.pbt_workflow.PBTWorkflowMetric[source]

Bases: evorl.metrics.MetricBase

iterations: chex.Array

‘zeros(…)’

sampled_timesteps_m: chex.Array

‘zeros(…)’

class evorl.algorithms.meta.pbt_workflow.PBTWorkflowTemplate(workflow: evorl.workflows.RLWorkflow, evaluator: evorl.evaluators.Evaluator, config: omegaconf.DictConfig)[source]

Bases: evorl.algorithms.meta.pbt_workflow.PBTWorkflowBase

Standard PBT Workflow Template.

exploit_and_explore(pbt_opt_state: evorl.algorithms.meta.pbt_workflow.PBTOptState, pop: chex.ArrayTree, pop_workflow_state: evorl.types.State, pop_metrics: chex.ArrayTree, key: chex.PRNGKey) tuple[chex.ArrayTree, evorl.types.State, evorl.algorithms.meta.pbt_workflow.PBTOptState][source]
learn(state: evorl.types.State) evorl.types.State[source]