evorl.algorithms.contrib.td3_v3

Module Contents

Classes

TD3Agent

The Agnet for TD3.

TD3NetworkParams

Contains training state for the learner.

TD3TrainMetric

TD3V3Workflow

The similar impl of TD3 in SB3 and CleanRL.

Functions

Data

API

evorl.algorithms.contrib.td3_v3.MISSING_LOSS

None

class evorl.algorithms.contrib.td3_v3.TD3Agent[source]

Bases: evorl.agent.Agent

The Agnet for TD3.

actor_network: flax.linen.Module

None

clip_policy_noise: float

0.5

compute_actions(agent_state: evorl.agent.AgentState, sample_batch: evorl.sample_batch.SampleBatch, key: chex.PRNGKey) tuple[evorl.types.Action, evorl.types.PolicyExtraInfo][source]
critic_network: flax.linen.Module

None

critics_in_actor_loss: str

‘first’

discount: float

0.99

evaluate_actions(agent_state: evorl.agent.AgentState, sample_batch: evorl.sample_batch.SampleBatch, key: chex.PRNGKey) tuple[evorl.types.Action, evorl.types.PolicyExtraInfo][source]
exploration_epsilon: float

0.5

init(obs_space: evorl.envs.Space, action_space: evorl.envs.Space, key: chex.PRNGKey) evorl.agent.AgentState[source]
property normalize_obs
obs_preprocessor: Any

‘pytree_field(…)’

policy_noise: float

0.2

class evorl.algorithms.contrib.td3_v3.TD3NetworkParams[source]

Bases: evorl.types.PyTreeData

Contains training state for the learner.

actor_params: evorl.types.Params

None

critic1_params: evorl.types.Params

None

critic2_params: evorl.types.Params

None

target_actor_params: evorl.types.Params

None

target_critic1_params: evorl.types.Params

None

target_critic2_params: evorl.types.Params

None

class evorl.algorithms.contrib.td3_v3.TD3TrainMetric[source]

Bases: evorl.metrics.MetricBase

actor_loss: chex.Array

None

critic1_loss: chex.Array

None

critic2_loss: chex.Array

None

q1: chex.Array

None

q2: chex.Array

None

class evorl.algorithms.contrib.td3_v3.TD3V3Workflow(env: evorl.envs.Env, agent: evorl.agent.Agent, optimizer: optax.GradientTransformation, evaluator: evorl.evaluators.Evaluator, replay_buffer: evorl.replay_buffers.AbstractReplayBuffer, config: omegaconf.DictConfig)[source]

Bases: evorl.algorithms.offpolicy_utils.OffPolicyWorkflowTemplate

The similar impl of TD3 in SB3 and CleanRL.

learn(state: evorl.types.State) evorl.types.State[source]
classmethod name()[source]
step(state: evorl.types.State) tuple[evorl.metrics.MetricBase, evorl.types.State][source]
evorl.algorithms.contrib.td3_v3.make_mlp_td3_agent(action_space: evorl.envs.Space, norm_layer_type: str = 'none', critic_hidden_layer_sizes: tuple[int] = (256, 256), actor_hidden_layer_sizes: tuple[int] = (256, 256), discount: float = 0.99, exploration_epsilon: float = 0.5, policy_noise: float = 0.2, clip_policy_noise: float = 0.5, critics_in_actor_loss: str = 'first', normalize_obs: bool = False)[source]