diff --git a/airplane/__init__.py b/airplane/__init__.py new file mode 100644 index 0000000..11c1bb8 --- /dev/null +++ b/airplane/__init__.py @@ -0,0 +1,15 @@ +from gymnasium.envs.registration import register + + +register( + id="ReducedSymmetricGliderPullout-v0", + entry_point="airplane.reduced_symmetric_glider_pullout:ReducedSymmetricGliderPullout", + max_episode_steps=100, +) + + +register( + id="ReducedBankedGliderPullout-v0", + entry_point="airplane.reduced_banked_glider_pullout:ReducedBankedGliderPullout", + max_episode_steps=100, +) \ No newline at end of file diff --git a/airplane/airplane_env.py b/airplane/airplane_env.py new file mode 100644 index 0000000..3781343 --- /dev/null +++ b/airplane/airplane_env.py @@ -0,0 +1,47 @@ +import numpy as np +from gymnasium import Env + +class AirplaneEnv(Env): + metadata = {"render_modes": ["human", "ascii", "ansi"], "render_fps": 60} + # TODO(gtorre): use render fps + + def __init__(self, airplane, render_mode=None): + self.airplane = airplane + + self.visualiser = None + self.render_mode = render_mode + assert render_mode is None or render_mode in self.metadata["render_modes"] + self.window = None + self.clock = None + + def seed(self, seed=None): + np.random.seed(seed) + + def render(self, mode: str | None = "ascii"): + """Renders the environment. + :param mode: str, the mode to render with: + - human: render to the current display or terminal and + return nothing. Usually for human consumption. + - ansi: Return a string (str) or StringIO.StringIO containing a + terminal-style text representation. The text can include newlines + and ANSI escape sequences (e.g. for colors). + """ + if mode == "human": + pass + #if not self.visualiser: + #self.visualiser = Visualizer(self.airplane) + #self.visualiser.plot() + + else: # ANSI or ASCII + # TODO: Add additional observations + print( + f"\u001b[34m Flight Path Angle (deg): {np.rad2deg(self.airplane.flight_path_angle):.2f}\u001b[37m" + ) + # TODO: Proper stall prediction + if self.airplane.flight_path_angle > 0.7: + print("\u001b[35m -- STALL --\u001b[37m") + + def close(self): + if self.window is not None: + # close + raise NotImplementedError \ No newline at end of file diff --git a/airplane/grumman.py b/airplane/grumman.py new file mode 100644 index 0000000..e451825 --- /dev/null +++ b/airplane/grumman.py @@ -0,0 +1,131 @@ +import numpy as np + + +class Grumman: + #################################### + ### Grumman American AA-1 Yankee ### + #################################### + """Base class for airplane parameters""" + + def __init__(self): + ###################### + ### Sim parameters ### + ###################### + self.TIME_STEP = 0.01 + self.GRAVITY = 9.81 + self.AIR_DENSITY = 1.225 # Density (ρ) [kg/m3] + + ########################### + ### Airplane parameters ### + ########################### + # Aerodynamic model: CL coefficients + self.CL_0 = 0.41 + self.CL_ALPHA = 4.6983 + self.CL_ELEVATOR = 0.361 + self.CL_QHAT = 2.42 + # Aerodynamic model: CD coefficients + self.CD_0 = 0.0525 + self.CD_ALPHA = 0.2068 + self.CD_ALPHA2 = 1.8712 + # Aerodynamic model: Cm coefficients + self.CM_0 = 0.076 + self.CM_ALPHA = -0.8938 + self.CM_ELEVATOR = -1.0313 + self.CM_QHAT = -7.15 + # Aerodynamic model: Cl coefficients + self.Cl_BETA = -0.1089 + self.Cl_PHAT = -0.52 + self.Cl_RHAT = 0.19 + self.Cl_AILERON = -0.1031 + self.Cl_RUDDER = 0.0143 + # Physical model + self.MASS = 697.18 # Mass (m) [kg] + self.WING_SURFACE_AREA = 9.1147 # Wing surface area (S) [m2] + self.CHORD = 1.22 # Chord (c) [m] + self.WING_SPAN = 7.46 # Wing Span (b) [m] + self.I_XX = 808.06 # Inertia [Kg.m^2] + self.I_YY = 1011.43 # Inertia [Kg.m^2] + self.ALPHA_STALL = np.deg2rad(15) # Stall angle of attack (αs) [rad] + self.ALPHA_NEGATIVE_STALL = np.deg2rad(-7) # Negative stall angle of attack (αs) [rad] + self.CL_STALL = self.CL_0 + self.CL_ALPHA * self.ALPHA_STALL + self.CL_REF = self.CL_STALL + # self.STALL_AIRSPEED = 32.19 # Stall air speed (Vs) [m/s] + self.STALL_AIRSPEED = np.sqrt(self.MASS * self.GRAVITY / (0.5 * self.AIR_DENSITY * \ + self.WING_SURFACE_AREA * self.CL_REF)) # Stall air speed (Vs) [m/s] + self.MAX_CRUISE_AIRSPEED = 2 * self.STALL_AIRSPEED # Maximum air speed (Vs) [m/s] + + # Throttle model + self.THROTTLE_LINEAR_MAPPING = None + self._initialize_throttle_model() + + def _update_state_from_derivative(self, value_to_update, value_derivative): + value_to_update += self.TIME_STEP * value_derivative + return value_to_update + + def _alpha_from_cl(self, c_lift): + alpha = (c_lift - self.CL_0) / self.CL_ALPHA + return alpha + + def _cl_from_lift_force_and_speed(self, lift_force, airspeed): + cl = 2 * lift_force / (self.AIR_DENSITY * self.WING_SURFACE_AREA * airspeed ** 2) + return cl + + def _cl_from_alpha(self, alpha, elevator, q_hat): + # TODO: review model + if alpha <= self.ALPHA_NEGATIVE_STALL: + c_lift = self.CL_0 + self.CL_ALPHA * self.ALPHA_NEGATIVE_STALL + elif alpha >= self.ALPHA_STALL: + # Stall model: Lift saturation + c_lift = self.CL_0 + self.CL_ALPHA * self.ALPHA_STALL + # Stall model: Lift reduction with opposite slope + # c_lift = - self.CL_ALPHA * alpha + self.CL_0 + 2 * self.CL_ALPHA * self.ALPHA_STALL + else: + c_lift = self.CL_0 + self.CL_ALPHA * alpha + self.CL_ELEVATOR * elevator + self.CL_QHAT * q_hat + return c_lift + + def _lift_force_at_speed_and_cl(self, airspeed, lift_coefficient): + return 0.5 * self.AIR_DENSITY * self.WING_SURFACE_AREA * airspeed ** 2 * lift_coefficient + + def _cd_from_alpha(self, alpha): + c_drag = self.CD_0 + self.CD_ALPHA * alpha + self.CD_ALPHA2 * (alpha ** 2) + return c_drag + + def _cd_from_cl(self, c_lift): + c_drag = self._cd_from_alpha(self._alpha_from_cl(c_lift)) + return c_drag + + def _drag_force_at_speed_and_cd(self, airspeed, drag_coefficient): + return 0.5 * self.AIR_DENSITY * self.WING_SURFACE_AREA * airspeed ** 2 * drag_coefficient + + def _drag_force_at_cruise_speed(self, airspeed): + cruise_lift_force = self.MASS * self.GRAVITY + cruise_cl = self._cl_from_lift_force_and_speed(cruise_lift_force, airspeed) + alpha = self._alpha_from_cl(cruise_cl) + cruise_cd = self._cd_from_alpha(alpha) + drag_force = self._drag_force_at_speed_and_cd(airspeed, cruise_cd) + return drag_force + + def _rolling_moment_coefficient(self, beta, p_hat, r_hat, aileron, rudder): + c_rolling_moment = self.Cl_BETA * beta + self.Cl_PHAT * p_hat + self.Cl_RHAT * r_hat + \ + self.Cl_AILERON * aileron + self.Cl_RUDDER * rudder + return c_rolling_moment + + def _rolling_moment_at_speed_and_cl(self, airspeed, rolling_moment_coefficient): + return 0.5 * self.AIR_DENSITY * self.WING_SURFACE_AREA * self.WING_SPAN * airspeed ** 2 * rolling_moment_coefficient + + def _pitching_moment_coefficient(self, alpha, elevator, q_hat): + c_pitch_moment = self.CM_0 + self.CM_ALPHA * alpha + self.CM_ELEVATOR * elevator + self.CM_QHAT * q_hat + return c_pitch_moment + + def _pitching_moment_at_speed_and_cm(self, airspeed, pitching_moment_coefficient): + return 0.5 * self.AIR_DENSITY * self.WING_SURFACE_AREA * self.CHORD * airspeed ** 2 * pitching_moment_coefficient + + def _initialize_throttle_model(self, ): + # Throttle model: Thrust force = Kt * δ_throttle + # Max Thrust -> Kt * 1 = Drag(V=Vmax) -> Kt = 0.5 ρ S (Vmax)^2 CD + # δ_throttle = 1.0 -> Max Cruise speed: V' = Vmax -> V_dot = 0 = Thrust Force - Drag Force + self.THROTTLE_LINEAR_MAPPING = self._drag_force_at_cruise_speed(self.MAX_CRUISE_AIRSPEED) + + def _thrust_force_at_throttle(self, throttle): + thrust_force = self.THROTTLE_LINEAR_MAPPING * throttle + return thrust_force \ No newline at end of file diff --git a/airplane/reduced_banked_glider_pullout.py b/airplane/reduced_banked_glider_pullout.py new file mode 100644 index 0000000..e327c09 --- /dev/null +++ b/airplane/reduced_banked_glider_pullout.py @@ -0,0 +1,71 @@ +import numpy as np +import gymnasium +from gymnasium import spaces +from matplotlib import pyplot as plt +from airplane.reduced_grumman import ReducedGrumman +from airplane.airplane_env import AirplaneEnv + + +class ReducedBankedGliderPullout(AirplaneEnv): + + def __init__(self, render_mode=None): + + self.airplane = ReducedGrumman() + super().__init__(self.airplane) + + """ bins_space = { + "flight_path_angle": np.linspace(np.deg2rad(-90), np.deg2rad(0), 20, dtype=np.float32), # Flight Path Angle (γ) (0) + "airspeed_norm": np.linspace(0.7, 4.0, 20, dtype=np.float32), # Air Speed (V) (1) + "bank_angle": np.linspace( np.deg2rad(-20), np.deg2rad(200), 20, dtype=np.float32), # Bank Angle (mu) (2) + } + + action_space= np.array(np.meshgrid(np.linspace(-0.5, 1.0, 5, dtype=np.float32), + np.linspace(np.deg2rad(-30), np.deg2rad(30), 5, dtype=np.float32))).T.reshape(-1, 2) + """ + + # Observation space: Flight Path Angle (γ), Air Speed (V), Bank Angle (μ) + self.observation_space = spaces.Box(np.array([np.deg2rad(-180), 0.7, np.deg2rad(-20)], np.float32), + np.array([np.deg2rad(0), 4.0, np.deg2rad(200)], np.float32), shape=(3,), dtype=np.float32) + # Action space: Lift Coefficient (CL), Bank Rate (μ') + self.action_space = spaces.Box(np.array([-0.5, np.deg2rad(-30)], np.float32), np.array([1.0, np.deg2rad(30)], np.float32), shape=(2,), dtype=np.float32) + + def _get_obs(self): + return np.vstack([self.airplane.flight_path_angle, self.airplane.airspeed_norm, self.airplane.bank_angle], dtype=np.float32).T + + def _get_info(self): + return {} + + def reset(self, seed=None, options=None): + + # Choose the initial agent's state uniformly + [flight_path_angle, airspeed_norm, bank_angle] = np.random.uniform(self.observation_space.low, self.observation_space.high) + self.airplane.reset(flight_path_angle, airspeed_norm, bank_angle) + + observation = self._get_obs(), {} + + return observation + + def step(self, action: list): + # Update state + action = np.clip(action, self.action_space.low, self.action_space.high) + c_lift = action[0] + bank_rate = action[1] + init_terminal = self.termination() + + self.airplane.command_airplane(c_lift, bank_rate, 0) + + # Calculate step reward: Height Loss + # TODO: Analyze policy performance based on reward implementation. + reward = self.airplane.TIME_STEP * self.airplane.airspeed_norm * np.sin(self.airplane.flight_path_angle) #- 1e-3 * bank_rate ** 2 + #reward = self.airplane.TIME_STEP * (self.airspeed_norm * self.STALL_AIRSPEED) * np.sin(self.airplane.flight_path_angle) + terminated = self.termination() + observation = self._get_obs() + info = self._get_info() + terminated = self.termination() | init_terminal + reward = np.where(init_terminal, 0, reward) + return observation, reward, terminated, False, info + + + def termination(self,): + terminate = np.where(((self.airplane.flight_path_angle>=0) | (self.airplane.flight_path_angle<=-180)) & (self.airplane.airspeed_norm >= 1) , True, False) + return terminate \ No newline at end of file diff --git a/airplane/reduced_grumman.py b/airplane/reduced_grumman.py new file mode 100644 index 0000000..c577bce --- /dev/null +++ b/airplane/reduced_grumman.py @@ -0,0 +1,56 @@ +import numpy as np +from airplane.grumman import Grumman + +class ReducedGrumman(Grumman): + #################################### + ### Grumman American AA-1 Yankee ### + #################################### + """Class for simplified airplane state and dynamics""" + + # NOTE: Commands as seperate objects? e.g. bank.rotate(airplane), + # throttle.accelerate(airplane), etc. + # NOTE: Use of α instead of Cl? + + def __init__(self): + super().__init__() + ########################## + ### Airplane variables ### + ########################## + self.flight_path_angle = np.zeros(10000, dtype=np.float32) # Flight Path Angle (γ) [rad] + self.airspeed_norm = np.ones_like(self.flight_path_angle, dtype=np.float32) # Air Speed (V/Vs) [1] + self.bank_angle = 0.0 # Bank Angle (μ) [rad] + # previous commands + self.last_c_lift = 0.0 + self.last_bank_rate = 0.0 + self.last_throttle = 0.0 + + def command_airplane(self, c_lift, bank_rate, delta_throttle): + self.last_c_lift = c_lift + self.last_bank_rate = bank_rate + self.last_throttle = delta_throttle + + c_drag = self._cd_from_cl(c_lift) + + # V_dot = - g sin γ - 0.5 * (ρ S V^2 CD / m) + (thrust force / m) + airspeed_dot = - self.GRAVITY * np.sin(self.flight_path_angle) - 0.5 * self.AIR_DENSITY * ( + self.WING_SURFACE_AREA / self.MASS) * (self.airspeed_norm * self.STALL_AIRSPEED) ** 2 * c_drag \ + + (self.THROTTLE_LINEAR_MAPPING * delta_throttle / self.MASS) + + # γ_dot = 0.5 * (ρ S V CL cos µ / m) - g cos γ / V + flight_path_angle_dot = 0.5 * self.AIR_DENSITY * (self.WING_SURFACE_AREA / self.MASS) * ( + self.airspeed_norm * self.STALL_AIRSPEED) * c_lift * np.cos(self.bank_angle) \ + - (self.GRAVITY / (self.airspeed_norm * self.STALL_AIRSPEED)) * np.cos( + self.flight_path_angle) + + # μ_dot = μ_dot_commanded + bank_angle_dot = bank_rate + + + self.airspeed_norm = self._update_state_from_derivative(self.airspeed_norm, airspeed_dot / self.STALL_AIRSPEED) + self.flight_path_angle = self._update_state_from_derivative(self.flight_path_angle, flight_path_angle_dot) + self.bank_angle = self._update_state_from_derivative(self.bank_angle, bank_angle_dot) + + def reset(self, flight_path_angle, airspeed_norm, bank_angle): + self.flight_path_angle = flight_path_angle + self.airspeed_norm = airspeed_norm + self.bank_angle = bank_angle \ No newline at end of file diff --git a/airplane/reduced_symmetric_glider_pullout.py b/airplane/reduced_symmetric_glider_pullout.py new file mode 100644 index 0000000..b189229 --- /dev/null +++ b/airplane/reduced_symmetric_glider_pullout.py @@ -0,0 +1,92 @@ +import numpy as np +from gymnasium import spaces + +from airplane.reduced_grumman import ReducedGrumman +from airplane.airplane_env import AirplaneEnv + +try: + import cupy as xp + if not xp.cuda.is_available(): + raise ImportError("CUDA is not available. Falling back to NumPy.") +except (ImportError, AttributeError): + xp = np + +class ReducedSymmetricGliderPullout(AirplaneEnv): + + def __init__(self, render_mode=None): + self.airplane = ReducedGrumman() + super().__init__(self.airplane) + + # Observation space: Flight Path Angle (γ), Air Speed (V) + self.observation_space = spaces.Box(np.array([-np.pi, 0.6], np.float32), + np.array([0, 4.0], np.float32), shape=(2,), dtype=np.float32) + # Action space: Lift Coefficient + self.action_space = spaces.Box(-0.5, 1.0, shape=(1,), dtype=np.float32) + + def _get_obs(self): + return np.vstack([self.airplane.flight_path_angle, self.airplane.airspeed_norm], dtype=np.float32).T + + def _get_info(self): + return {} + + def reset(self, seed=None, options=None): + + # Choose the initial agent's state uniformly + [flight_path_angle, airspeed_norm] = np.random.uniform(self.observation_space.low, self.observation_space.high) + self.airplane.reset(flight_path_angle, airspeed_norm, 0) + + observation = self._get_obs() + # clip the observation to the observation space + observation = np.clip(observation, self.observation_space.low, self.observation_space.high).flatten() + assert self.observation_space.contains(observation), "Observation is not within the observation space!" + return observation, {} + + def step(self, action: list): + # Update state + c_lift = action #action[0] + #self.airplane.command_airplane(c_lift, 0, 0) + + delta_throttle = 0 + bank_rate = 0 + + init_terminal = self.termination() + + self.airplane.last_c_lift = c_lift + self.airplane.last_bank_rate = bank_rate + self.airplane.last_throttle = delta_throttle + + c_drag = self.airplane._cd_from_cl(c_lift) + + # V_dot = - g sin γ - 0.5 * (ρ S V^2 CD / m) + (thrust force / m) + airspeed_dot = - self.airplane.GRAVITY * np.sin(self.airplane.flight_path_angle) - 0.5 * self.airplane.AIR_DENSITY * ( + self.airplane.WING_SURFACE_AREA / self.airplane.MASS) * (self.airplane.airspeed_norm * self.airplane.STALL_AIRSPEED) ** 2 * c_drag \ + + (self.airplane.THROTTLE_LINEAR_MAPPING * delta_throttle / self.airplane.MASS) + + # γ_dot = 0.5 * (ρ S V CL cos µ / m) - g cos γ / V + flight_path_angle_dot = 0.5 * self.airplane.AIR_DENSITY * (self.airplane.WING_SURFACE_AREA / self.airplane.MASS) * ( + self.airplane.airspeed_norm * self.airplane.STALL_AIRSPEED) * c_lift * np.cos(self.airplane.bank_angle) \ + - (self.airplane.GRAVITY / (self.airplane.airspeed_norm * self.airplane.STALL_AIRSPEED)) * np.cos( + self.airplane.flight_path_angle) + + # μ_dot = μ_dot_commanded + bank_angle_dot = bank_rate + + self.airplane.airspeed_norm += self.airplane.TIME_STEP * (airspeed_dot / self.airplane.STALL_AIRSPEED) + self.airplane.flight_path_angle += self.airplane.TIME_STEP * flight_path_angle_dot + #clip the state to the observation space + self.airplane.airspeed_norm = np.clip(self.airplane.airspeed_norm, self.observation_space.low[1], self.observation_space.high[1]) + self.airplane.flight_path_angle = np.clip(self.airplane.flight_path_angle, self.observation_space.low[0], self.observation_space.high[0]) + # Calculate step reward: Height Loss + reward = self.airplane.TIME_STEP * self.airplane.airspeed_norm * np.sin(self.airplane.flight_path_angle)*27.331231856346 + + # Get the next state + info = self._get_info() + terminated = self.termination() | init_terminal + reward = np.where(init_terminal, 0, reward) + + return np.vstack([self.airplane.flight_path_angle, self.airplane.airspeed_norm], dtype=np.float32).T, reward, terminated, False, info + + + def termination(self,): + terminate = np.where((self.airplane.flight_path_angle >= 0.0) & (self.airplane.airspeed_norm >= 1) , True, False) + return terminate diff --git a/src/PolicyIteration.py b/src/PolicyIteration.py index a9dc9f1..1d462ba 100644 --- a/src/PolicyIteration.py +++ b/src/PolicyIteration.py @@ -1,12 +1,13 @@ import os import pickle +import numpy as np import gymnasium as gym from loguru import logger + from scipy.spatial import Delaunay +from functools import cached_property from utils.utils import plot_3D_value_function - -import numpy as np try: import cupy as cp @@ -15,8 +16,7 @@ logger.info("CUDA driver is available.") except (ImportError, AttributeError): - - import numpy as cp + cp = np logger.warning("CUDA is not available. Falling back to NumPy.") def asarray(arr, *args, **kwargs): """In NumPy, this just ensures the object is a NumPy array, with support for additional arguments.""" @@ -28,12 +28,12 @@ def asnumpy(arr, *args, **kwargs): np.asarray = asarray np.asnumpy = asnumpy - cp = np + class PolicyIteration(object): - """ - A class to perform Policy Iteration on discretized continuous environments. + + """A class to perform Policy Iteration on discretized continuous environments. Attributes: env (gym.Env): The Gym environment to work with. @@ -51,19 +51,29 @@ class PolicyIteration(object): Example: - from classic_control.cartpole import CartPoleEnv - - env = CartPoleEnv() - bins_space = { - "x_space": np.linspace(-x_lim, x_lim, 12), # position space (0) - "x_dot_space": np.linspace(-x_dot_lim, x_dot_lim, 12), # velocity space (1) - "theta_space": np.linspace(-theta_lim, theta_lim, 12), # angle space (2) - "theta_dot_space": np.linspace(-theta_dot_lim, theta_dot_lim, 12), # angular velocity space (3) - } - action_space = [0, 1] - pi = PolicyIteration(env, bins_space, action_space) - pi.run() - """ + import pickle + import airplane + import numpy as np + import gymnasium as gym + from utils.utils import test_enviroment + from PolicyIteration import PolicyIteration + + glider = gym.make('ReducedSymmetricGliderPullout-v0') + + bins_space = { + "flight_path_angle": np.linspace(-np.pi, 0.5, 100, dtype=np.float32), # Flight Path Angle (γ) (0) + "airspeed_norm": np.linspace(0.7, 4.0, 100, dtype=np.float32), # Air Speed (V) (1) + } + + pi = PolicyIteration( + env=glider, + bins_space=bins_space, + action_space=np.linspace(-0.4, 1.0, 15, dtype=np.float32), + gamma=0.99, + theta=1e-3, + ) + + pi.run() """ metadata = {"img_path": os.getcwd()+"/img/",} @@ -124,6 +134,12 @@ def __init__(self, env: gym.Env, self.grid = np.meshgrid(*self.bins_space.values(), indexing='ij') # Flatten and stack to create a list of points in the space self.states_space = np.vstack([g.ravel() for g in self.grid], dtype=np.float32).T + + # get x and y coordinates + x = self.states_space[:,0] + y = self.states_space[:,1] + self.terminal_states = np.where((x >= 0.0) & (y >= 1) , True, False) + self.terminal_reward = 0 # self.num_simplex_points:int = int(self.states_space[0].shape[0] + 1) # number of points in a simplex one more than the dimension self.space_dim:int = int(self.states_space[0].shape[0]) @@ -131,7 +147,7 @@ def __init__(self, env: gym.Env, self.num_states:int = int(self.states_space.shape[0]) self.num_actions:int = int(self.action_space.shape[0]) - logger.info(f"The action space is: {self.action_space}") + #logger.info(f"The action space is: {self.action_space}") logger.info(f"Number of states: {len(self.states_space)}") logger.info(f"Total states:{len(self.states_space)*len(self.action_space)}") @@ -160,6 +176,7 @@ def __in_cell__(self, obs: cp.ndarray) -> cp.ndarray: Returns: np.ndarray: A boolean array indicating whether each observation is within the valid state bounds. """ + #return cp.all((obs >= self.cell_lower_bounds[:, None]) & (obs <= self.cell_upper_bounds[:, None]), axis=1) return cp.all((obs >= self.cell_lower_bounds) & (obs <= self.cell_upper_bounds), axis=1) def barycentric_coordinates(self, points:np.ndarray)->tuple: @@ -224,14 +241,22 @@ def calculate_transition_reward_table(self): "points_indexes": The indexes of the points in the simplex. """ for j, action in enumerate(self.action_space): - self.env.state = cp.asarray(self.states_space, dtype=cp.float32) - obs_gpu, reward_gpu, _, _, _ = self.env.step(action) + self.env.reset() + #self.env.state = cp.asarray(self.states_space, dtype=cp.float32) + state = np.array(self.states_space, dtype=np.float32) + self.env.airplane.flight_path_angle = state[:,0].copy() + self.env.airplane.airspeed_norm = state[:,1].copy() + self.env.airplane.bank_angle = state[:,2].copy() + + obs_gpu, reward_gpu, terminated, _, _ = self.env.step(action) + # log if any state is outside the bounds of the environment states_outside_gpu = self.__in_cell__(obs_gpu) if bool(cp.any(~states_outside_gpu)): # get the indexes of the states outside the bounds - reward_gpu = cp.where(states_outside_gpu, reward_gpu, -100) logger.warning(f"Some states are outside the bounds of the environment.") + # if the state is terminal, set the reward to zero + #reward_gpu = cp.where(terminated, 0, reward_gpu) # if any state is outside the bounds of the environment clip it to the bounds obs_gpu = cp.clip(obs_gpu, self.cell_lower_bounds, self.cell_upper_bounds) # get the barycentric coordinates of the resulting state in CPU for now. @@ -244,18 +269,28 @@ def calculate_transition_reward_table(self): self.lambdas[:,j] = cp.asarray(lambdas, dtype=cp.float32) self.simplexes[:,j] = cp.asarray(simplexes, dtype=cp.float32) self.points_indexes[:,j] = cp.asarray(points_indexes, dtype=cp.int32) + def get_value(self, lambdas:cp.ndarray, point_indexes:cp.ndarray, value_function:cp.ndarray)->cp.ndarray: - """ Calculates the next state value based on the given lambdas, point indexes, and value function. + """ Compute the next state value using barycentric coordinates. + + This function extracts values from the given value_function at indices specified by + point_indexes and then computes the weighted sum using the provided barycentric coordinates (lambdas). + It returns a one-dimensional array containing the computed value for each state. + Args: - lambdas (cp.ndarray): The lambdas array of shape (num_states, num_simplex_points,1). - point_indexes (cp.ndarray): The point indexes array of shape (num_states, num_simplex_points,1). - value_function (cp.ndarray): The value function. + lambdas (cp.ndarray): A CuPy array of barycentric coordinates with shape + (num_states, num_simplex_points, 1). + point_indexes (cp.ndarray): A CuPy array of simplex vertex indices with shape + (num_states, num_simplex_points, 1) used to index into the value_function. + value_function (cp.ndarray): A CuPy array containing the current state values. + Returns: - cp.ndarray: The next state value. + cp.ndarray: A one-dimensional CuPy array of computed next state values (shape (num_states,)). + Raises: - Exception: If states in point_indexes are not found in the value function. """ + Exception: If any of the indices in point_indexes are not valid for the given value_function. """ assert lambdas.shape == (self.num_states, self.num_simplex_points,1), f"lambdas shape: {lambdas.shape}" assert point_indexes.shape == (self.num_states, self.num_simplex_points,1), f"point_indexes shape: {point_indexes.shape}" @@ -280,12 +315,17 @@ def policy_evaluation(self): logger.info("Starting policy evaluation") while cp.abs(float(max_error)) > self.theta: # initialize the new value function to zeros - new_value_function = cp.zeros_like(self.value_function, dtype=cp.float32) + new_value_function = cp.zeros_like(self.value_function, dtype=cp.float32) + vf_next_state = cp.zeros_like(self.value_function, dtype=cp.float32) new_val = cp.zeros_like(self.value_function, dtype=cp.float32) + new_value_function[self.terminal_states] = self.terminal_reward + new_val[self.terminal_states] = self.terminal_reward + for j, _ in enumerate(self.action_space): # Checkout 'Variable Resolution Discretization in Optimal Control, eq 5' - next_state_value = self.get_value(self.lambdas[:, j], self.points_indexes[:, j], self.value_function) - new_val += self.policy[:,j] * (self.reward[:,j] + self.gamma * next_state_value) + vf_next_state[~self.terminal_states] = self.get_value(self.lambdas[:, j], self.points_indexes[:, j], self.value_function)[~self.terminal_states] + new_val[~self.terminal_states] += self.policy[~self.terminal_states,j] * (self.reward[~self.terminal_states,j] + self.gamma * vf_next_state[~self.terminal_states]) + new_value_function = new_val # update the error: the maximum difference between the new and old value functions errors = cp.fabs(new_value_function[:] - self.value_function[:]) diff --git a/src/utils/utils.py b/src/utils/utils.py index a378ed9..357383d 100644 --- a/src/utils/utils.py +++ b/src/utils/utils.py @@ -11,18 +11,24 @@ def plot_3D_value_function(vf: np.array, """ Plots a 3D value function in some color scale.""" # Assuming points is a 2D array where each row is a point [position, velocity] - X = points[:, 0] # x-axis (position) - Y = points[:, 1] # y-axis (velocity) + X = points[:, 0] # x-axis + # transformb X from rad to deg + X = np.rad2deg(X) + Y = points[:, 1] # y-axis vf = vf # z-axis (value function) vf_to_plot = (vf - vf.min()) / (vf.max() - vf.min()) if normalize else vf fig = plt.figure() ax = fig.add_subplot(111, projection='3d') # Use plot_trisurf for unstructured triangular surface plot surf = ax.plot_trisurf(X, Y, vf_to_plot, cmap=cmap, edgecolor='white', linewidth=0.2) + # Add title + ax.set_title('Reduced Symmetric Glider Value Function ', pad=20) # Add labels - ax.set_xlabel('position') - ax.set_ylabel('velocity') - ax.set_zlabel('Normalized Value Function') + ax.set_xlabel('Flight Path Angle (γ) [deg]', labelpad=10) + ax.set_ylabel('V/Vs', labelpad=10) + ax.set_zlabel('Normalized Value Function', labelpad=10) + ax.set_xticks(np.round(np.linspace(min(X), max(X), 5))) # 5 ticks on the x-axis + ax.set_yticks(np.round(np.linspace(min(Y), max(Y), 5))) # 5 ticks on the y-axis # Add color bar to represent the value range if path is not None: plt.savefig(path) # Show the plot @@ -70,7 +76,7 @@ def get_optimal_action(state:np.array, optimal_policy:np.array): Returns: action: The optimal action for the given state. """ - lambdas, simmplex_info = get_barycentric_coordinates(optimal_policy, state) + lambdas, simmplex_info = get_barycentric_coordinates(optimal_policy,state) simplex, points_indexes = simmplex_info actions = optimal_policy.action_space probabilities = np.zeros(len(actions), dtype=np.float32) diff --git a/test.py b/test.py index 8a5dd1a..8aa5879 100644 --- a/test.py +++ b/test.py @@ -1,30 +1,33 @@ import pickle +import airplane import numpy as np +import gymnasium as gym from utils.utils import plot_3D_value_function from PolicyIteration import PolicyIteration -from classic_control.continuous_mountain_car import Continuous_MountainCarEnv -env=Continuous_MountainCarEnv() +glider = gym.make('ReducedSymmetricGliderPullout-v0') bins_space = { - "x_space": np.linspace(env.min_position, env.max_position, 100, dtype=np.float32), # position space (0) - "x_dot_space": np.linspace(-abs(env.max_speed), abs(env.max_speed), 100, dtype=np.float32), # velocity space (1) + "flight_path_angle": np.linspace(-np.pi-0.01, 0.10, 100, dtype=np.float32), # Flight Path Angle (γ) (0) + "airspeed_norm": np.linspace(0.7, 4, 100, dtype=np.float32), # Air Speed (V) (1) } pi = PolicyIteration( - env=env, + env=glider, bins_space=bins_space, - action_space=np.linspace(-1.0, +1.0,9, dtype=np.float32), + action_space=np.linspace(-0.4, 1.0, 15, dtype=np.float32), gamma=0.99, theta=1e-3, ) -pi.run() -with open(env.__class__.__name__ + ".pkl", "rb") as f: + +#pi.run() + +with open(glider.__class__.__name__ + ".pkl", "rb") as f: pi: PolicyIteration = pickle.load(f) plot_3D_value_function(vf = pi.value_function, points = pi.states_space, - normalize=True, + normalize=False, show=True, path="./test_vf.png") \ No newline at end of file diff --git a/testing.ipynb b/testing.ipynb index b5a7f7d..d2f758b 100644 --- a/testing.ipynb +++ b/testing.ipynb @@ -13,17 +13,1248 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "002626e9", + "execution_count": 1, + "id": "9c6a7123", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2025-03-10 11:32:33.563\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m20\u001b[0m - \u001b[33m\u001b[1mCUDA is not available. Falling back to NumPy.\u001b[0m\n", + "\u001b[32m2025-03-10 11:32:33.568\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m131\u001b[0m - \u001b[1mLower bounds: [-3.1415927 0.9 -0.34906584]\u001b[0m\n", + "\u001b[32m2025-03-10 11:32:33.570\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m132\u001b[0m - \u001b[1mUpper bounds: [0. 4.5 3.4906585]\u001b[0m\n", + "\u001b[32m2025-03-10 11:32:33.571\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m151\u001b[0m - \u001b[1mNumber of states: 8000\u001b[0m\n", + "\u001b[32m2025-03-10 11:32:33.571\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m152\u001b[0m - \u001b[1mTotal states:576000\u001b[0m\n", + "\u001b[32m2025-03-10 11:32:33.573\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m165\u001b[0m - \u001b[1mPolicy Iteration was correctly initialized.\u001b[0m\n", + "\u001b[32m2025-03-10 11:32:33.574\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mThe enviroment name is: TimeLimit\u001b[0m\n", + "\u001b[32m2025-03-10 11:32:33.574\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mrun\u001b[0m:\u001b[36m390\u001b[0m - \u001b[1mCreating Delaunay triangulation over the state space...\u001b[0m\n", + "\u001b[32m2025-03-10 11:32:33.996\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mrun\u001b[0m:\u001b[36m392\u001b[0m - \u001b[1mDelaunay triangulation created.\u001b[0m\n", + "\u001b[32m2025-03-10 11:32:33.997\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mrun\u001b[0m:\u001b[36m402\u001b[0m - \u001b[1mGenerating transition and reward function table...\u001b[0m\n", + "/home/nromero/anaconda3/envs/DynamicProgramming/lib/python3.11/site-packages/gymnasium/utils/passive_env_checker.py:159: UserWarning: \u001b[33mWARN: The obs returned by the `reset()` method is not within the observation space.\u001b[0m\n", + " logger.warn(f\"{pre} is not within the observation space.\")\n", + "/home/nromero/anaconda3/envs/DynamicProgramming/lib/python3.11/site-packages/gymnasium/core.py:311: UserWarning: \u001b[33mWARN: env.airplane to get variables from other wrappers is deprecated and will be removed in v1.0, to get this variable you can do `env.unwrapped.airplane` for environment variables or `env.get_wrapper_attr('airplane')` that will search the reminding wrappers.\u001b[0m\n", + " logger.warn(\n", + "/home/nromero/anaconda3/envs/DynamicProgramming/lib/python3.11/site-packages/gymnasium/utils/passive_env_checker.py:228: UserWarning: \u001b[33mWARN: Expects `terminated` signal to be a boolean, actual type: \u001b[0m\n", + " logger.warn(\n", + "/home/nromero/anaconda3/envs/DynamicProgramming/lib/python3.11/site-packages/gymnasium/utils/passive_env_checker.py:159: UserWarning: \u001b[33mWARN: The obs returned by the `step()` method is not within the observation space.\u001b[0m\n", + " logger.warn(f\"{pre} is not within the observation space.\")\n", + "/home/nromero/anaconda3/envs/DynamicProgramming/lib/python3.11/site-packages/gymnasium/utils/passive_env_checker.py:246: UserWarning: \u001b[33mWARN: The reward returned by `step()` must be a float, int, np.integer or np.floating, actual type: \u001b[0m\n", + " logger.warn(\n", + "\u001b[32m2025-03-10 11:32:34.000\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m257\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n", + "\u001b[32m2025-03-10 11:32:37.189\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m257\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n", + "\u001b[32m2025-03-10 11:32:40.183\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m257\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n", + "\u001b[32m2025-03-10 11:32:43.159\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m257\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n", + "\u001b[32m2025-03-10 11:32:46.071\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m257\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n", + "\u001b[32m2025-03-10 11:32:48.612\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m257\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n", + "\u001b[32m2025-03-10 11:32:51.111\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m257\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n", + "\u001b[32m2025-03-10 11:32:53.257\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m257\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n", + "\u001b[32m2025-03-10 11:32:55.422\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m257\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n", + "\u001b[32m2025-03-10 11:32:57.725\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m257\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n", + "\u001b[32m2025-03-10 11:32:59.973\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m257\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n", + "\u001b[32m2025-03-10 11:33:02.233\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m257\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n", + "\u001b[32m2025-03-10 11:33:04.457\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m257\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n", + "\u001b[32m2025-03-10 11:33:07.260\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m257\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n", + "\u001b[32m2025-03-10 11:33:10.255\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m257\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n", + "\u001b[32m2025-03-10 11:33:13.343\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m257\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n", + "\u001b[32m2025-03-10 11:33:15.893\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m257\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n", + "\u001b[32m2025-03-10 11:33:18.551\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m257\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n", + "\u001b[32m2025-03-10 11:33:21.666\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m257\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n", + "\u001b[32m2025-03-10 11:33:23.366\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m257\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n", + "\u001b[32m2025-03-10 11:33:25.046\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m257\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n", + "\u001b[32m2025-03-10 11:33:26.790\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m257\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n", + "\u001b[32m2025-03-10 11:33:28.670\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m257\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n", + "\u001b[32m2025-03-10 11:33:30.387\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m257\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n", + "\u001b[32m2025-03-10 11:33:32.134\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m257\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n", + "\u001b[32m2025-03-10 11:33:34.158\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m257\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n", + "\u001b[32m2025-03-10 11:33:36.198\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m257\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n", + "\u001b[32m2025-03-10 11:33:38.240\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m257\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n", + "\u001b[32m2025-03-10 11:33:40.266\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m257\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n", + "\u001b[32m2025-03-10 11:33:42.311\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m257\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n", + "\u001b[32m2025-03-10 11:33:44.275\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m257\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n", + "\u001b[32m2025-03-10 11:33:46.222\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m257\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n", + "\u001b[32m2025-03-10 11:33:48.468\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m257\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n", + "\u001b[32m2025-03-10 11:33:50.514\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m257\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n", + "\u001b[32m2025-03-10 11:33:52.535\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m257\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n", + "\u001b[32m2025-03-10 11:33:55.166\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m257\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n", + "\u001b[32m2025-03-10 11:33:57.578\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m257\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n", + "\u001b[32m2025-03-10 11:33:59.952\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m257\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n", + "\u001b[32m2025-03-10 11:34:01.933\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m257\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n", + "\u001b[32m2025-03-10 11:34:03.818\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m257\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n", + "\u001b[32m2025-03-10 11:34:05.921\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m257\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n", + "\u001b[32m2025-03-10 11:34:08.129\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m257\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n", + "\u001b[32m2025-03-10 11:34:10.222\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m257\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n", + "\u001b[32m2025-03-10 11:34:11.934\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m257\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n", + "\u001b[32m2025-03-10 11:34:13.855\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m257\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n", + "\u001b[32m2025-03-10 11:34:15.998\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m257\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n", + "\u001b[32m2025-03-10 11:34:17.991\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m257\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n", + "\u001b[32m2025-03-10 11:34:19.763\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m257\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n", + "\u001b[32m2025-03-10 11:34:21.526\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m257\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n", + "\u001b[32m2025-03-10 11:34:23.483\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m257\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n", + "\u001b[32m2025-03-10 11:34:25.516\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m257\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n", + "\u001b[32m2025-03-10 11:34:27.486\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m257\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n", + "\u001b[32m2025-03-10 11:34:29.438\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m257\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n", + "\u001b[32m2025-03-10 11:34:31.380\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m257\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n", + "\u001b[32m2025-03-10 11:34:33.281\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m257\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n", + "\u001b[32m2025-03-10 11:34:34.941\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m257\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n", + "\u001b[32m2025-03-10 11:34:36.651\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m257\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n", + "\u001b[32m2025-03-10 11:34:38.347\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m257\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n", + "\u001b[32m2025-03-10 11:34:41.196\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m257\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n", + "\u001b[32m2025-03-10 11:34:43.508\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m257\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n", + "\u001b[32m2025-03-10 11:34:46.056\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m257\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n", + "\u001b[32m2025-03-10 11:34:48.715\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m257\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n", + "\u001b[32m2025-03-10 11:34:51.347\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m257\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n", + "\u001b[32m2025-03-10 11:34:54.544\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m257\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n", + "\u001b[32m2025-03-10 11:34:57.181\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m257\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n", + "\u001b[32m2025-03-10 11:35:00.748\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m257\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n", + "\u001b[32m2025-03-10 11:35:03.135\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m257\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n", + "\u001b[32m2025-03-10 11:35:05.102\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m257\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n", + "\u001b[32m2025-03-10 11:35:07.151\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m257\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n", + "\u001b[32m2025-03-10 11:35:09.058\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m257\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n", + "\u001b[32m2025-03-10 11:35:10.944\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m257\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n", + "\u001b[32m2025-03-10 11:35:12.882\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m257\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n", + "\u001b[32m2025-03-10 11:35:14.767\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mrun\u001b[0m:\u001b[36m404\u001b[0m - \u001b[1mTransition and reward function table generated.\u001b[0m\n", + "\u001b[32m2025-03-10 11:35:14.767\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mrun\u001b[0m:\u001b[36m406\u001b[0m - \u001b[1msolving step 0\u001b[0m\n", + "\u001b[32m2025-03-10 11:35:14.767\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m315\u001b[0m - \u001b[1mStarting policy evaluation\u001b[0m\n", + "\u001b[32m2025-03-10 11:35:14.797\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m339\u001b[0m - \u001b[1mMax Error: 0.04500000178813934 | Avg Error: 0.01600000075995922 | 800<0.001\u001b[0m\n", + "\u001b[32m2025-03-10 11:35:19.289\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m339\u001b[0m - \u001b[1mMax Error: 0.009999999776482582 | Avg Error: 0.004000000189989805 | 621<0.001\u001b[0m\n", + "\u001b[32m2025-03-10 11:35:23.737\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m339\u001b[0m - \u001b[1mMax Error: 0.0020000000949949026 | Avg Error: 0.0010000000474974513 | 3706<0.001\u001b[0m\n", + "\u001b[32m2025-03-10 11:35:29.902\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m339\u001b[0m - \u001b[1mMax Error: 0.0 | Avg Error: 0.0 | 8000<0.001\u001b[0m\n", + "\u001b[32m2025-03-10 11:35:29.903\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m356\u001b[0m - \u001b[1mPolicy evaluation finished.\u001b[0m\n", + "\u001b[32m2025-03-10 11:35:29.903\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_improvement\u001b[0m:\u001b[36m365\u001b[0m - \u001b[1mStarting policy improvement\u001b[0m\n", + "\u001b[32m2025-03-10 11:35:29.950\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_improvement\u001b[0m:\u001b[36m378\u001b[0m - \u001b[1mThe number of updated different actions: 576000\u001b[0m\n", + "\u001b[32m2025-03-10 11:35:29.950\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_improvement\u001b[0m:\u001b[36m381\u001b[0m - \u001b[1mPolicy improvement finished.\u001b[0m\n", + "\u001b[32m2025-03-10 11:35:29.951\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mrun\u001b[0m:\u001b[36m406\u001b[0m - \u001b[1msolving step 1\u001b[0m\n", + "\u001b[32m2025-03-10 11:35:29.951\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m315\u001b[0m - \u001b[1mStarting policy evaluation\u001b[0m\n", + "\u001b[32m2025-03-10 11:35:30.004\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m339\u001b[0m - \u001b[1mMax Error: 0.04800000041723251 | Avg Error: 0.006000000052154064 | 1707<0.001\u001b[0m\n", + "\u001b[32m2025-03-10 11:35:37.576\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m339\u001b[0m - \u001b[1mMax Error: 0.004000000189989805 | Avg Error: 0.0010000000474974513 | 4169<0.001\u001b[0m\n", + "\u001b[32m2025-03-10 11:35:44.947\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m339\u001b[0m - \u001b[1mMax Error: 0.0010000000474974513 | Avg Error: 0.0 | 8000<0.001\u001b[0m\n", + "\u001b[32m2025-03-10 11:35:52.910\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m339\u001b[0m - \u001b[1mMax Error: 0.0 | Avg Error: 0.0 | 8000<0.001\u001b[0m\n", + "\u001b[32m2025-03-10 11:35:52.910\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m356\u001b[0m - \u001b[1mPolicy evaluation finished.\u001b[0m\n", + "\u001b[32m2025-03-10 11:35:52.910\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_improvement\u001b[0m:\u001b[36m365\u001b[0m - \u001b[1mStarting policy improvement\u001b[0m\n", + "\u001b[32m2025-03-10 11:35:52.950\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_improvement\u001b[0m:\u001b[36m378\u001b[0m - \u001b[1mThe number of updated different actions: 6020\u001b[0m\n", + "\u001b[32m2025-03-10 11:35:52.950\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_improvement\u001b[0m:\u001b[36m381\u001b[0m - \u001b[1mPolicy improvement finished.\u001b[0m\n", + "\u001b[32m2025-03-10 11:35:52.951\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mrun\u001b[0m:\u001b[36m406\u001b[0m - \u001b[1msolving step 2\u001b[0m\n", + "\u001b[32m2025-03-10 11:35:52.951\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m315\u001b[0m - \u001b[1mStarting policy evaluation\u001b[0m\n", + "\u001b[32m2025-03-10 11:35:53.007\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m339\u001b[0m - \u001b[1mMax Error: 0.024000000208616257 | Avg Error: 0.0010000000474974513 | 6709<0.001\u001b[0m\n", + "\u001b[32m2025-03-10 11:36:00.112\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m339\u001b[0m - \u001b[1mMax Error: 0.0010000000474974513 | Avg Error: 0.0 | 8000<0.001\u001b[0m\n", + "\u001b[32m2025-03-10 11:36:04.005\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m339\u001b[0m - \u001b[1mMax Error: 0.0 | Avg Error: 0.0 | 8000<0.001\u001b[0m\n", + "\u001b[32m2025-03-10 11:36:04.006\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m356\u001b[0m - \u001b[1mPolicy evaluation finished.\u001b[0m\n", + "\u001b[32m2025-03-10 11:36:04.006\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_improvement\u001b[0m:\u001b[36m365\u001b[0m - \u001b[1mStarting policy improvement\u001b[0m\n", + "\u001b[32m2025-03-10 11:36:04.029\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_improvement\u001b[0m:\u001b[36m378\u001b[0m - \u001b[1mThe number of updated different actions: 1910\u001b[0m\n", + "\u001b[32m2025-03-10 11:36:04.029\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_improvement\u001b[0m:\u001b[36m381\u001b[0m - \u001b[1mPolicy improvement finished.\u001b[0m\n", + "\u001b[32m2025-03-10 11:36:04.030\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mrun\u001b[0m:\u001b[36m406\u001b[0m - \u001b[1msolving step 3\u001b[0m\n", + "\u001b[32m2025-03-10 11:36:04.030\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m315\u001b[0m - \u001b[1mStarting policy evaluation\u001b[0m\n", + "\u001b[32m2025-03-10 11:36:04.059\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m339\u001b[0m - \u001b[1mMax Error: 0.010999999940395355 | Avg Error: 0.0 | 7768<0.001\u001b[0m\n", + "\u001b[32m2025-03-10 11:36:08.036\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m339\u001b[0m - \u001b[1mMax Error: 0.0 | Avg Error: 0.0 | 8000<0.001\u001b[0m\n", + "\u001b[32m2025-03-10 11:36:08.036\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m356\u001b[0m - \u001b[1mPolicy evaluation finished.\u001b[0m\n", + "\u001b[32m2025-03-10 11:36:08.037\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_improvement\u001b[0m:\u001b[36m365\u001b[0m - \u001b[1mStarting policy improvement\u001b[0m\n", + "\u001b[32m2025-03-10 11:36:08.064\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_improvement\u001b[0m:\u001b[36m378\u001b[0m - \u001b[1mThe number of updated different actions: 386\u001b[0m\n", + "\u001b[32m2025-03-10 11:36:08.064\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_improvement\u001b[0m:\u001b[36m381\u001b[0m - \u001b[1mPolicy improvement finished.\u001b[0m\n", + "\u001b[32m2025-03-10 11:36:08.065\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mrun\u001b[0m:\u001b[36m406\u001b[0m - \u001b[1msolving step 4\u001b[0m\n", + "\u001b[32m2025-03-10 11:36:08.065\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m315\u001b[0m - \u001b[1mStarting policy evaluation\u001b[0m\n", + "\u001b[32m2025-03-10 11:36:08.097\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m339\u001b[0m - \u001b[1mMax Error: 0.0020000000949949026 | Avg Error: 0.0 | 7981<0.001\u001b[0m\n", + "\u001b[32m2025-03-10 11:36:12.564\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m339\u001b[0m - \u001b[1mMax Error: 0.0 | Avg Error: 0.0 | 8000<0.001\u001b[0m\n", + "\u001b[32m2025-03-10 11:36:12.564\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m356\u001b[0m - \u001b[1mPolicy evaluation finished.\u001b[0m\n", + "\u001b[32m2025-03-10 11:36:12.565\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_improvement\u001b[0m:\u001b[36m365\u001b[0m - \u001b[1mStarting policy improvement\u001b[0m\n", + "\u001b[32m2025-03-10 11:36:12.595\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_improvement\u001b[0m:\u001b[36m378\u001b[0m - \u001b[1mThe number of updated different actions: 64\u001b[0m\n", + "\u001b[32m2025-03-10 11:36:12.596\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_improvement\u001b[0m:\u001b[36m381\u001b[0m - \u001b[1mPolicy improvement finished.\u001b[0m\n", + "\u001b[32m2025-03-10 11:36:12.596\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mrun\u001b[0m:\u001b[36m406\u001b[0m - \u001b[1msolving step 5\u001b[0m\n", + "\u001b[32m2025-03-10 11:36:12.596\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m315\u001b[0m - \u001b[1mStarting policy evaluation\u001b[0m\n", + "\u001b[32m2025-03-10 11:36:12.628\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m339\u001b[0m - \u001b[1mMax Error: 0.0 | Avg Error: 0.0 | 8000<0.001\u001b[0m\n", + "\u001b[32m2025-03-10 11:36:12.629\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m356\u001b[0m - \u001b[1mPolicy evaluation finished.\u001b[0m\n", + "\u001b[32m2025-03-10 11:36:12.629\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_improvement\u001b[0m:\u001b[36m365\u001b[0m - \u001b[1mStarting policy improvement\u001b[0m\n", + "\u001b[32m2025-03-10 11:36:12.655\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_improvement\u001b[0m:\u001b[36m381\u001b[0m - \u001b[1mPolicy improvement finished.\u001b[0m\n", + "\u001b[32m2025-03-10 11:36:12.714\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36msave\u001b[0m:\u001b[36m425\u001b[0m - \u001b[1mPolicy and value function saved.\u001b[0m\n" + ] + } + ], "source": [ "import pickle\n", + "import airplane\n", "import numpy as np\n", + "import gymnasium as gym\n", "from utils.utils import test_enviroment\n", - "from PolicyIteration import PolicyIteration" + "from PolicyIteration import PolicyIteration\n", + "\n", + "glider = gym.make('ReducedBankedGliderPullout-v0')\n", + "\n", + "bins_space = {\n", + " \"flight_path_angle\": np.linspace(np.deg2rad(-180), np.deg2rad(0), 20, dtype=np.float32), # Flight Path Angle (γ) (0)\n", + " \"airspeed_norm\": np.linspace(0.9, 4.5, 20, dtype=np.float32), # Air Speed (V) (1)\n", + " \"bank_angle\": np.linspace( np.deg2rad(-20), np.deg2rad(200), 20, dtype=np.float32), # Bank Angle (mu) (2)\n", + "}\n", + "\n", + "\n", + "action_space= np.array(np.meshgrid(np.linspace(-0.5, 1.0, 6, dtype=np.float32), \n", + " np.linspace(np.deg2rad(-30), np.deg2rad(30), 12, dtype=np.float32))).T.reshape(-1, 2)\n", + "\n", + "pi = PolicyIteration(\n", + " env=glider, \n", + " bins_space=bins_space,\n", + " action_space= action_space,\n", + " gamma=0.99,\n", + " theta=1e-3,\n", + ")\n", + "\n", + "\n", + "pi.run()" ] }, + { + "cell_type": "code", + "execution_count": 5, + "id": "442d7f05", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Action: [1. 0.524] | Reward: -0.011854943438053572 | State: (-80.16007232666016, 1.203194499015808, 150.3000030517578) | Terminated: False | Episode Length: 0.01 | C_L: 1.0\n", + "Action: [1. 0.524] | Reward: -0.02374708114579749 | State: (-80.3203353881836, 1.2063888311386108, 150.59999084472656) | Terminated: False | Episode Length: 0.02 | C_L: 1.0\n", + "Action: [1. 0.524] | Reward: -0.03567635565003305 | State: (-80.48078918457031, 1.2095831632614136, 150.89999389648438) | Terminated: False | Episode Length: 0.03 | C_L: 1.0\n", + "Action: [1. 0.524] | Reward: -0.047642708128035866 | State: (-80.64144134521484, 1.2127772569656372, 151.19998168945312) | Terminated: False | Episode Length: 0.04 | C_L: 1.0\n", + "Action: [1. 0.524] | Reward: -0.05964607839916092 | State: (-80.80228424072266, 1.2159712314605713, 151.5) | Terminated: False | Episode Length: 0.05 | C_L: 1.0\n", + "Action: [1. 0.524] | Reward: -0.0716864049166001 | State: (-80.96331787109375, 1.2191649675369263, 151.79998779296875) | Terminated: False | Episode Length: 0.060000000000000005 | C_L: 1.0\n", + "Action: [1. 0.524] | Reward: -0.0837636247592959 | State: (-81.12454223632812, 1.2223584651947021, 152.09999084472656) | Terminated: False | Episode Length: 0.07 | C_L: 1.0\n", + "Action: [1. 0.524] | Reward: -0.09587767362401492 | State: (-81.28596496582031, 1.2255516052246094, 152.39999389648438) | Terminated: False | Episode Length: 0.08 | C_L: 1.0\n", + "Action: [1. 0.524] | Reward: -0.10802848581758488 | State: (-81.44757843017578, 1.228744626045227, 152.6999969482422) | Terminated: False | Episode Length: 0.09 | C_L: 1.0\n", + "Action: [1. 0.524] | Reward: -0.12021599424929892 | State: (-81.60939025878906, 1.2319371700286865, 152.99998474121094) | Terminated: False | Episode Length: 0.09999999999999999 | C_L: 1.0\n", + "Action: [1. 0.524] | Reward: -0.1324401304234907 | State: (-81.77139282226562, 1.2351293563842773, 153.29998779296875) | Terminated: False | Episode Length: 0.10999999999999999 | C_L: 1.0\n", + "Action: [1. 0.524] | Reward: -0.14470082443228416 | State: (-81.93358612060547, 1.2383211851119995, 153.59999084472656) | Terminated: False | Episode Length: 0.11999999999999998 | C_L: 1.0\n", + "Action: [1. 0.524] | Reward: -0.15699800494852156 | State: (-82.09597778320312, 1.241512656211853, 153.8999786376953) | Terminated: False | Episode Length: 0.12999999999999998 | C_L: 1.0\n", + "Action: [1. 0.524] | Reward: -0.16933159921887353 | State: (-82.25856018066406, 1.2447036504745483, 154.1999969482422) | Terminated: False | Episode Length: 0.13999999999999999 | C_L: 1.0\n", + "Action: [1. 0.524] | Reward: -0.18170153305713466 | State: (-82.42134094238281, 1.247894048690796, 154.49998474121094) | Terminated: False | Episode Length: 0.15 | C_L: 1.0\n", + "Action: [1. 0.524] | Reward: -0.1941077308377086 | State: (-82.58431243896484, 1.2510839700698853, 154.79998779296875) | Terminated: False | Episode Length: 0.16 | C_L: 1.0\n", + "Action: [1. 0.524] | Reward: -0.20655011548928606 | State: (-82.74747467041016, 1.2542734146118164, 155.09999084472656) | Terminated: False | Episode Length: 0.17 | C_L: 1.0\n", + "Action: [1. 0.524] | Reward: -0.21902860848871952 | State: (-82.91084289550781, 1.2574621438980103, 155.39999389648438) | Terminated: False | Episode Length: 0.18000000000000002 | C_L: 1.0\n", + "Action: [1. 0.524] | Reward: -0.23154312985509834 | State: (-83.07439422607422, 1.260650396347046, 155.69998168945312) | Terminated: False | Episode Length: 0.19000000000000003 | C_L: 1.0\n", + "Action: [1. 0.524] | Reward: -0.2440935981440278 | State: (-83.2381362915039, 1.2638379335403442, 156.0) | Terminated: False | Episode Length: 0.20000000000000004 | C_L: 1.0\n", + "Action: [1. 0.524] | Reward: -0.2566799304421158 | State: (-83.4020767211914, 1.2670247554779053, 156.29998779296875) | Terminated: False | Episode Length: 0.21000000000000005 | C_L: 1.0\n", + "Action: [1. 0.524] | Reward: -0.2693020423616708 | State: (-83.56621551513672, 1.2702109813690186, 156.59999084472656) | Terminated: False | Episode Length: 0.22000000000000006 | C_L: 1.0\n", + "Action: [1. 0.524] | Reward: -0.28195984803561475 | State: (-83.73052978515625, 1.273396372795105, 156.89999389648438) | Terminated: False | Episode Length: 0.23000000000000007 | C_L: 1.0\n", + "Action: [1. 0.524] | Reward: -0.2946532601126142 | State: (-83.8950424194336, 1.276581048965454, 157.1999969482422) | Terminated: False | Episode Length: 0.24000000000000007 | C_L: 1.0\n", + "Action: [1. 0.524] | Reward: -0.3073821897524338 | State: (-84.05974578857422, 1.2797648906707764, 157.49998474121094) | Terminated: False | Episode Length: 0.25000000000000006 | C_L: 1.0\n", + "Action: [1. 0.524] | Reward: -0.3201465466215152 | State: (-84.22463989257812, 1.2829477787017822, 157.8000030517578) | Terminated: False | Episode Length: 0.26000000000000006 | C_L: 1.0\n", + "Action: [1. 0.524] | Reward: -0.3329462388887851 | State: (-84.38972473144531, 1.2861299514770508, 158.09999084472656) | Terminated: False | Episode Length: 0.2700000000000001 | C_L: 1.0\n", + "Action: [1. 0.524] | Reward: -0.34578117322169616 | State: (-84.55499267578125, 1.289311170578003, 158.39999389648438) | Terminated: False | Episode Length: 0.2800000000000001 | C_L: 1.0\n", + "Action: [1. 0.524] | Reward: -0.3586512547825038 | State: (-84.72044372558594, 1.2924914360046387, 158.6999969482422) | Terminated: False | Episode Length: 0.2900000000000001 | C_L: 1.0\n", + "Action: [1. 0.524] | Reward: -0.37155638722478307 | State: (-84.88609313964844, 1.295670747756958, 158.99998474121094) | Terminated: False | Episode Length: 0.3000000000000001 | C_L: 1.0\n", + "Action: [1. 0.524] | Reward: -0.384496472690188 | State: (-85.05192565917969, 1.2988489866256714, 159.29998779296875) | Terminated: False | Episode Length: 0.3100000000000001 | C_L: 1.0\n", + "Action: [1. 0.524] | Reward: -0.39747141180545814 | State: (-85.21793365478516, 1.3020262718200684, 159.59999084472656) | Terminated: False | Episode Length: 0.3200000000000001 | C_L: 1.0\n", + "Action: [1. 0.524] | Reward: -0.41048110367967466 | State: (-85.3841323852539, 1.3052024841308594, 159.89999389648438) | Terminated: False | Episode Length: 0.3300000000000001 | C_L: 1.0\n", + "Action: [1. 0.524] | Reward: -0.4235254459017697 | State: (-85.5505142211914, 1.3083775043487549, 160.19998168945312) | Terminated: False | Episode Length: 0.34000000000000014 | C_L: 1.0\n", + "Action: [1. 0.524] | Reward: -0.43660433453829267 | State: (-85.71707153320312, 1.3115514516830444, 160.5) | Terminated: False | Episode Length: 0.35000000000000014 | C_L: 1.0\n", + "Action: [1. 0.524] | Reward: -0.4497176641314361 | State: (-85.8838119506836, 1.3147242069244385, 160.79998779296875) | Terminated: False | Episode Length: 0.36000000000000015 | C_L: 1.0\n", + "Action: [1. 0.524] | Reward: -0.462865327697325 | State: (-86.05073547363281, 1.317895770072937, 161.09999084472656) | Terminated: False | Episode Length: 0.37000000000000016 | C_L: 1.0\n", + "Action: [1. 0.524] | Reward: -0.47604721672457256 | State: (-86.21782684326172, 1.32106614112854, 161.39999389648438) | Terminated: False | Episode Length: 0.38000000000000017 | C_L: 1.0\n", + "Action: [1. 0.524] | Reward: -0.48926322117310544 | State: (-86.38510131835938, 1.324235200881958, 161.6999969482422) | Terminated: False | Episode Length: 0.3900000000000002 | C_L: 1.0\n", + "Action: [1. 0.524] | Reward: -0.5025132294732619 | State: (-86.55255126953125, 1.327402949333191, 161.99998474121094) | Terminated: False | Episode Length: 0.4000000000000002 | C_L: 1.0\n", + "Action: [1. 0.524] | Reward: -0.5157971285251659 | State: (-86.72016906738281, 1.3305693864822388, 162.29998779296875) | Terminated: False | Episode Length: 0.4100000000000002 | C_L: 1.0\n", + "Action: [1. 0.524] | Reward: -0.5291148036983803 | State: (-86.8879623413086, 1.333734393119812, 162.59999084472656) | Terminated: False | Episode Length: 0.4200000000000002 | C_L: 1.0\n", + "Action: [1. 0.524] | Reward: -0.5424661388318416 | State: (-87.05592346191406, 1.3368979692459106, 162.89999389648438) | Terminated: False | Episode Length: 0.4300000000000002 | C_L: 1.0\n", + "Action: [1. 0.524] | Reward: -0.5558510162340805 | State: (-87.22406005859375, 1.3400602340698242, 163.19998168945312) | Terminated: False | Episode Length: 0.4400000000000002 | C_L: 1.0\n", + "Action: [1. 0.524] | Reward: -0.5692693166837299 | State: (-87.39236450195312, 1.3432209491729736, 163.5) | Terminated: False | Episode Length: 0.45000000000000023 | C_L: 1.0\n", + "Action: [1. 0.524] | Reward: -0.582720919430324 | State: (-87.56082153320312, 1.3463801145553589, 163.79998779296875) | Terminated: False | Episode Length: 0.46000000000000024 | C_L: 1.0\n", + "Action: [1. 0.524] | Reward: -0.5962057021953907 | State: (-87.72945404052734, 1.3495378494262695, 164.09999084472656) | Terminated: False | Episode Length: 0.47000000000000025 | C_L: 1.0\n", + "Action: [1. 0.524] | Reward: -0.6097235411738414 | State: (-87.89823913574219, 1.3526939153671265, 164.39999389648438) | Terminated: False | Episode Length: 0.48000000000000026 | C_L: 1.0\n", + "Action: [1. 0.524] | Reward: -0.6232743110356592 | State: (-88.06719207763672, 1.3558483123779297, 164.6999969482422) | Terminated: False | Episode Length: 0.49000000000000027 | C_L: 1.0\n", + "Action: [1. 0.524] | Reward: -0.6368578849278896 | State: (-88.23629760742188, 1.3590011596679688, 164.99998474121094) | Terminated: False | Episode Length: 0.5000000000000002 | C_L: 1.0\n", + "Action: [1. 0.524] | Reward: -0.6504741344769358 | State: (-88.40556335449219, 1.362152338027954, 165.29998779296875) | Terminated: False | Episode Length: 0.5100000000000002 | C_L: 1.0\n", + "Action: [1. 0.524] | Reward: -0.6641229297911606 | State: (-88.57498168945312, 1.3653017282485962, 165.59999084472656) | Terminated: False | Episode Length: 0.5200000000000002 | C_L: 1.0\n", + "Action: [1. 0.524] | Reward: -0.677804139463799 | State: (-88.74455261230469, 1.3684494495391846, 165.8999786376953) | Terminated: False | Episode Length: 0.5300000000000002 | C_L: 1.0\n", + "Action: [1. 0.524] | Reward: -0.6915176305761824 | State: (-88.91427612304688, 1.3715953826904297, 166.1999969482422) | Terminated: False | Episode Length: 0.5400000000000003 | C_L: 1.0\n", + "Action: [1. 0.524] | Reward: -0.7052632687012773 | State: (-89.08414459228516, 1.374739408493042, 166.49998474121094) | Terminated: False | Episode Length: 0.5500000000000003 | C_L: 1.0\n", + "Action: [1. 0.524] | Reward: -0.7190409179075412 | State: (-89.25415802001953, 1.377881646156311, 166.79998779296875) | Terminated: False | Episode Length: 0.5600000000000003 | C_L: 1.0\n", + "Action: [1. 0.524] | Reward: -0.7328504407630978 | State: (-89.42432403564453, 1.3810219764709473, 167.09999084472656) | Terminated: False | Episode Length: 0.5700000000000003 | C_L: 1.0\n", + "Action: [1. 0.524] | Reward: -0.7466916983402333 | State: (-89.59461975097656, 1.3841603994369507, 167.39999389648438) | Terminated: False | Episode Length: 0.5800000000000003 | C_L: 1.0\n", + "Action: [1. 0.524] | Reward: -0.7605645502202167 | State: (-89.76506042480469, 1.3872967958450317, 167.69998168945312) | Terminated: False | Episode Length: 0.5900000000000003 | C_L: 1.0\n", + "Action: [1. 0.524] | Reward: -0.7744688544984455 | State: (-89.93563842773438, 1.39043128490448, 168.0) | Terminated: False | Episode Length: 0.6000000000000003 | C_L: 1.0\n", + "Action: [1. 0.524] | Reward: -0.7884044677899188 | State: (-90.10635375976562, 1.3935637474060059, 168.29998779296875) | Terminated: False | Episode Length: 0.6100000000000003 | C_L: 1.0\n", + "Action: [1. 0.524] | Reward: -0.8023712452350408 | State: (-90.27719116210938, 1.3966940641403198, 168.59999084472656) | Terminated: False | Episode Length: 0.6200000000000003 | C_L: 1.0\n", + "Action: [1. 0.524] | Reward: -0.816369040505755 | State: (-90.44816589355469, 1.3998223543167114, 168.89999389648438) | Terminated: False | Episode Length: 0.6300000000000003 | C_L: 1.0\n", + "Action: [1. 0.524] | Reward: -0.8303977058120122 | State: (-90.6192626953125, 1.4029484987258911, 169.1999969482422) | Terminated: False | Episode Length: 0.6400000000000003 | C_L: 1.0\n", + "Action: [1. 0.524] | Reward: -0.8444570919085732 | State: (-90.79048919677734, 1.4060723781585693, 169.49998474121094) | Terminated: False | Episode Length: 0.6500000000000004 | C_L: 1.0\n", + "Action: [1. 0.524] | Reward: -0.8585470481021481 | State: (-90.96183776855469, 1.4091942310333252, 169.8000030517578) | Terminated: False | Episode Length: 0.6600000000000004 | C_L: 1.0\n", + "Action: [1. 0.524] | Reward: -0.8726674222588741 | State: (-91.13330078125, 1.41231369972229, 170.09999084472656) | Terminated: False | Episode Length: 0.6700000000000004 | C_L: 1.0\n", + "Action: [1. 0.428] | Reward: -0.8868180608121324 | State: (-91.30488586425781, 1.4154309034347534, 170.34544372558594) | Terminated: False | Episode Length: 0.6800000000000004 | C_L: 1.0\n", + "Action: [1. 0.428] | Reward: -0.900998808950988 | State: (-91.47655487060547, 1.4185458421707153, 170.59091186523438) | Terminated: False | Episode Length: 0.6900000000000004 | C_L: 1.0\n", + "Action: [1. 0.428] | Reward: -0.9152095105021218 | State: (-91.64830780029297, 1.4216583967208862, 170.8363494873047) | Terminated: False | Episode Length: 0.7000000000000004 | C_L: 1.0\n", + "Action: [1. 0.428] | Reward: -0.9294500079346705 | State: (-91.82013702392578, 1.4247685670852661, 171.08180236816406) | Terminated: False | Episode Length: 0.7100000000000004 | C_L: 1.0\n", + "Action: [1. 0.428] | Reward: -0.9437201423653089 | State: (-91.99205780029297, 1.427876353263855, 171.3272705078125) | Terminated: False | Episode Length: 0.7200000000000004 | C_L: 1.0\n", + "Action: [1. 0.428] | Reward: -0.9580197535635762 | State: (-92.16405487060547, 1.4309816360473633, 171.57272338867188) | Terminated: False | Episode Length: 0.7300000000000004 | C_L: 1.0\n", + "Action: [1. 0.428] | Reward: -0.9723486799574463 | State: (-92.33612823486328, 1.4340845346450806, 171.8181610107422) | Terminated: False | Episode Length: 0.7400000000000004 | C_L: 1.0\n", + "Action: [1. 0.428] | Reward: -0.9867067586391448 | State: (-92.50828552246094, 1.4371848106384277, 172.06362915039062) | Terminated: False | Episode Length: 0.7500000000000004 | C_L: 1.0\n", + "Action: [1. 0.428] | Reward: -1.0010938253712118 | State: (-92.68051147460938, 1.4402825832366943, 172.30908203125) | Terminated: False | Episode Length: 0.7600000000000005 | C_L: 1.0\n", + "Action: [1. 0.428] | Reward: -1.015509714592814 | State: (-92.85281372070312, 1.4433777332305908, 172.55453491210938) | Terminated: False | Episode Length: 0.7700000000000005 | C_L: 1.0\n", + "Action: [1. 0.428] | Reward: -1.029954259426304 | State: (-93.02519226074219, 1.4464702606201172, 172.79998779296875) | Terminated: False | Episode Length: 0.7800000000000005 | C_L: 1.0\n", + "Action: [1. 0.428] | Reward: -1.0444272916840305 | State: (-93.19763946533203, 1.4495601654052734, 173.04544067382812) | Terminated: False | Episode Length: 0.7900000000000005 | C_L: 1.0\n", + "Action: [1. 0.428] | Reward: -1.058928641875398 | State: (-93.37015533447266, 1.4526472091674805, 173.29090881347656) | Terminated: False | Episode Length: 0.8000000000000005 | C_L: 1.0\n", + "Action: [1. 0.428] | Reward: -1.073458139214179 | State: (-93.5427474975586, 1.4557316303253174, 173.53636169433594) | Terminated: False | Episode Length: 0.8100000000000005 | C_L: 1.0\n", + "Action: [1. 0.428] | Reward: -1.0880156116260773 | State: (-93.71540069580078, 1.4588133096694946, 173.78179931640625) | Terminated: False | Episode Length: 0.8200000000000005 | C_L: 1.0\n", + "Action: [1. 0.428] | Reward: -1.1026008857565432 | State: (-93.88811492919922, 1.4618921279907227, 174.0272674560547) | Terminated: False | Episode Length: 0.8300000000000005 | C_L: 1.0\n", + "Action: [1. 0.428] | Reward: -1.1172137869788443 | State: (-94.06089782714844, 1.464968204498291, 174.27272033691406) | Terminated: False | Episode Length: 0.8400000000000005 | C_L: 1.0\n", + "Action: [1. 0.428] | Reward: -1.1318541394023878 | State: (-94.2337417602539, 1.4680413007736206, 174.51817321777344) | Terminated: False | Episode Length: 0.8500000000000005 | C_L: 1.0\n", + "Action: [1. 0.428] | Reward: -1.1465217658812985 | State: (-94.40665435791016, 1.471111536026001, 174.7636260986328) | Terminated: False | Episode Length: 0.8600000000000005 | C_L: 1.0\n", + "Action: [1. 0.428] | Reward: -1.1612164880232518 | State: (-94.5796127319336, 1.4741787910461426, 175.0090789794922) | Terminated: False | Episode Length: 0.8700000000000006 | C_L: 1.0\n", + "Action: [1. 0.428] | Reward: -1.175938126198561 | State: (-94.75262451171875, 1.4772430658340454, 175.25453186035156) | Terminated: False | Episode Length: 0.8800000000000006 | C_L: 1.0\n", + "Action: [1. 0.428] | Reward: -1.1906864995495208 | State: (-94.92569732666016, 1.48030424118042, 175.5) | Terminated: False | Episode Length: 0.8900000000000006 | C_L: 1.0\n", + "Action: [1. 0.428] | Reward: -1.2054614260000063 | State: (-95.09882354736328, 1.4833624362945557, 175.7454376220703) | Terminated: False | Episode Length: 0.9000000000000006 | C_L: 1.0\n", + "Action: [1. 0.428] | Reward: -1.2202627222653293 | State: (-95.2719955444336, 1.4864176511764526, 175.9908905029297) | Terminated: False | Episode Length: 0.9100000000000006 | C_L: 1.0\n", + "Action: [1. 0.428] | Reward: -1.2350902038623495 | State: (-95.44522094726562, 1.4894695281982422, 176.23635864257812) | Terminated: False | Episode Length: 0.9200000000000006 | C_L: 1.0\n", + "Action: [1. 0.428] | Reward: -1.2499436851198427 | State: (-95.61849212646484, 1.492518424987793, 176.4818115234375) | Terminated: False | Episode Length: 0.9300000000000006 | C_L: 1.0\n", + "Action: [1. 0.428] | Reward: -1.2648229791891255 | State: (-95.79180908203125, 1.4955639839172363, 176.72726440429688) | Terminated: False | Episode Length: 0.9400000000000006 | C_L: 1.0\n", + "Action: [1. 0.428] | Reward: -1.279727898054937 | State: (-95.96515655517578, 1.4986064434051514, 176.97271728515625) | Terminated: False | Episode Length: 0.9500000000000006 | C_L: 1.0\n", + "Action: [1. 0.428] | Reward: -1.2946582525465757 | State: (-96.13855743408203, 1.501645565032959, 177.21817016601562) | Terminated: False | Episode Length: 0.9600000000000006 | C_L: 1.0\n", + "Action: [1. 0.428] | Reward: -1.3096138523492944 | State: (-96.31199645996094, 1.5046814680099487, 177.46363830566406) | Terminated: False | Episode Length: 0.9700000000000006 | C_L: 1.0\n", + "Action: [1. 0.428] | Reward: -1.3245945060159496 | State: (-96.48545837402344, 1.5077139139175415, 177.70907592773438) | Terminated: False | Episode Length: 0.9800000000000006 | C_L: 1.0\n", + "Action: [1. 0.428] | Reward: -1.3396000209789083 | State: (-96.65896606445312, 1.5107430219650269, 177.95452880859375) | Terminated: False | Episode Length: 0.9900000000000007 | C_L: 1.0\n", + "Action: [1. 0.428] | Reward: -1.354630203562209 | State: (-96.83250427246094, 1.5137687921524048, 178.1999969482422) | Terminated: False | Episode Length: 1.0000000000000007 | C_L: 1.0\n", + "Action: [1. 0.428] | Reward: -1.36968485899398 | State: (-97.00606536865234, 1.5167911052703857, 178.44544982910156) | Terminated: False | Episode Length: 1.0100000000000007 | C_L: 1.0\n", + "Action: [1. 0.428] | Reward: -1.3847637914191098 | State: (-97.17965698242188, 1.5198098421096802, 178.69088745117188) | Terminated: False | Episode Length: 1.0200000000000007 | C_L: 1.0\n", + "Action: [1. 0.428] | Reward: -1.3998668039121742 | State: (-97.35327911376953, 1.5228252410888672, 178.9363555908203) | Terminated: False | Episode Length: 1.0300000000000007 | C_L: 1.0\n", + "Action: [ 1. -0.048] | Reward: -1.4149936984906164 | State: (-97.52691650390625, 1.5258369445800781, 178.90907287597656) | Terminated: False | Episode Length: 1.0400000000000007 | C_L: 1.0\n", + "Action: [ 1. -0.048] | Reward: -1.4301442766706631 | State: (-97.7005615234375, 1.528845191001892, 178.88180541992188) | Terminated: False | Episode Length: 1.0500000000000007 | C_L: 1.0\n", + "Action: [ 1. -0.048] | Reward: -1.4453183388531636 | State: (-97.87421417236328, 1.53184974193573, 178.8545379638672) | Terminated: False | Episode Length: 1.0600000000000007 | C_L: 1.0\n", + "Action: [ 1. -0.048] | Reward: -1.4605156843250102 | State: (-98.0478744506836, 1.5348505973815918, 178.8272705078125) | Terminated: False | Episode Length: 1.0700000000000007 | C_L: 1.0\n", + "Action: [ 1. -0.048] | Reward: -1.4757361112606324 | State: (-98.2215347290039, 1.537847876548767, 178.79998779296875) | Terminated: False | Episode Length: 1.0800000000000007 | C_L: 1.0\n", + "Action: [ 1. -0.048] | Reward: -1.490979416723567 | State: (-98.39521789550781, 1.5408413410186768, 178.77272033691406) | Terminated: False | Episode Length: 1.0900000000000007 | C_L: 1.0\n", + "Action: [ 1. -0.048] | Reward: -1.5062453966681015 | State: (-98.56890106201172, 1.5438312292099, 178.7454376220703) | Terminated: False | Episode Length: 1.1000000000000008 | C_L: 1.0\n", + "Action: [ 1. -0.048] | Reward: -1.5215338459409935 | State: (-98.74259185791016, 1.5468171834945679, 178.71817016601562) | Terminated: False | Episode Length: 1.1100000000000008 | C_L: 1.0\n", + "Action: [ 1. -0.048] | Reward: -1.5368445582832653 | State: (-98.91630554199219, 1.5497993230819702, 178.69088745117188) | Terminated: False | Episode Length: 1.1200000000000008 | C_L: 1.0\n", + "Action: [ 1. -0.048] | Reward: -1.5521773263320744 | State: (-99.09001922607422, 1.552777647972107, 178.66363525390625) | Terminated: False | Episode Length: 1.1300000000000008 | C_L: 1.0\n", + "Action: [ 1. -0.048] | Reward: -1.5675319416226603 | State: (-99.26376342773438, 1.5557520389556885, 178.6363525390625) | Terminated: False | Episode Length: 1.1400000000000008 | C_L: 1.0\n", + "Action: [ 1. -0.048] | Reward: -1.5829081945903676 | State: (-99.43750762939453, 1.5587226152420044, 178.6090850830078) | Terminated: False | Episode Length: 1.1500000000000008 | C_L: 1.0\n", + "Action: [ 1. -0.048] | Reward: -1.598305874572745 | State: (-99.61127471923828, 1.5616891384124756, 178.58180236816406) | Terminated: False | Episode Length: 1.1600000000000008 | C_L: 1.0\n", + "Action: [ 1. -0.048] | Reward: -1.613724769811723 | State: (-99.78506469726562, 1.5646517276763916, 178.55453491210938) | Terminated: False | Episode Length: 1.1700000000000008 | C_L: 1.0\n", + "Action: [ 1. -0.048] | Reward: -1.6291646674558673 | State: (-99.9588623046875, 1.567610263824463, 178.52725219726562) | Terminated: False | Episode Length: 1.1800000000000008 | C_L: 1.0\n", + "Action: [ 1. -0.048] | Reward: -1.6446253535627107 | State: (-100.13268280029297, 1.5705647468566895, 178.49998474121094) | Terminated: False | Episode Length: 1.1900000000000008 | C_L: 1.0\n", + "Action: [ 1. -0.048] | Reward: -1.6601066131011626 | State: (-100.3065185546875, 1.5735151767730713, 178.47271728515625) | Terminated: False | Episode Length: 1.2000000000000008 | C_L: 1.0\n", + "Action: [ 1. -0.048] | Reward: -1.6756082299539967 | State: (-100.48038482666016, 1.5764614343643188, 178.44544982910156) | Terminated: False | Episode Length: 1.2100000000000009 | C_L: 1.0\n", + "Action: [ 1. -0.048] | Reward: -1.6911299869204168 | State: (-100.65426635742188, 1.5794035196304321, 178.41818237304688) | Terminated: False | Episode Length: 1.2200000000000009 | C_L: 1.0\n", + "Action: [ 1. -0.048] | Reward: -1.7066716657187018 | State: (-100.82817077636719, 1.5823414325714111, 178.39089965820312) | Terminated: False | Episode Length: 1.2300000000000009 | C_L: 1.0\n", + "Action: [ 1. -0.048] | Reward: -1.722233046988929 | State: (-101.00209045410156, 1.5852751731872559, 178.36363220214844) | Terminated: False | Episode Length: 1.2400000000000009 | C_L: 1.0\n", + "Action: [ 1. -0.048] | Reward: -1.737813910295778 | State: (-101.17604064941406, 1.5882046222686768, 178.3363494873047) | Terminated: False | Episode Length: 1.2500000000000009 | C_L: 1.0\n", + "Action: [ 1. -0.048] | Reward: -1.753414034131411 | State: (-101.35001373291016, 1.5911297798156738, 178.30908203125) | Terminated: False | Episode Length: 1.260000000000001 | C_L: 1.0\n", + "Action: [ 1. -0.048] | Reward: -1.769033195918437 | State: (-101.52401733398438, 1.5940505266189575, 178.28179931640625) | Terminated: False | Episode Length: 1.270000000000001 | C_L: 1.0\n", + "Action: [ 1. -0.048] | Reward: -1.7846711720129522 | State: (-101.69804382324219, 1.596967101097107, 178.25454711914062) | Terminated: False | Episode Length: 1.280000000000001 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -1.800327737707662 | State: (-101.87208557128906, 1.5998791456222534, 178.28179931640625) | Terminated: False | Episode Length: 1.290000000000001 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -1.8160026668954568 | State: (-102.04617309570312, 1.6027867794036865, 178.30908203125) | Terminated: False | Episode Length: 1.300000000000001 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -1.831695732394703 | State: (-102.22029113769531, 1.6056898832321167, 178.3363494873047) | Terminated: False | Episode Length: 1.310000000000001 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -1.8474067059524595 | State: (-102.39444732666016, 1.6085885763168335, 178.36363220214844) | Terminated: False | Episode Length: 1.320000000000001 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -1.8631353582477748 | State: (-102.56863403320312, 1.6114827394485474, 178.39089965820312) | Terminated: False | Episode Length: 1.330000000000001 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -1.8788814588950662 | State: (-102.74285888671875, 1.6143723726272583, 178.41818237304688) | Terminated: False | Episode Length: 1.340000000000001 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -1.8946447764475816 | State: (-102.91712188720703, 1.6172573566436768, 178.44544982910156) | Terminated: False | Episode Length: 1.350000000000001 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -1.910425078400943 | State: (-103.09141540527344, 1.6201378107070923, 178.47271728515625) | Terminated: False | Episode Length: 1.360000000000001 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -1.926222131196774 | State: (-103.26575469970703, 1.6230134963989258, 178.49998474121094) | Terminated: False | Episode Length: 1.370000000000001 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -1.9420357002264084 | State: (-103.44013214111328, 1.6258845329284668, 178.52725219726562) | Terminated: False | Episode Length: 1.380000000000001 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -1.9578655498346835 | State: (-103.61454772949219, 1.6287508010864258, 178.55453491210938) | Terminated: False | Episode Length: 1.390000000000001 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -1.973711443323816 | State: (-103.78899383544922, 1.6316123008728027, 178.58180236816406) | Terminated: False | Episode Length: 1.400000000000001 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -1.989573142957361 | State: (-103.96349334716797, 1.6344690322875977, 178.6090850830078) | Terminated: False | Episode Length: 1.410000000000001 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -2.0054504099642547 | State: (-104.13802337646484, 1.6373209953308105, 178.6363525390625) | Terminated: False | Episode Length: 1.420000000000001 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -2.0213430045429415 | State: (-104.31260681152344, 1.6401680707931519, 178.66363525390625) | Terminated: False | Episode Length: 1.430000000000001 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -2.0372506858655837 | State: (-104.48722076416016, 1.6430102586746216, 178.69088745117188) | Terminated: False | Episode Length: 1.440000000000001 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -2.0531732120823563 | State: (-104.66188049316406, 1.6458474397659302, 178.71817016601562) | Terminated: False | Episode Length: 1.450000000000001 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -2.0691103403258264 | State: (-104.83658599853516, 1.6486797332763672, 178.7454376220703) | Terminated: False | Episode Length: 1.460000000000001 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -2.0850618267154157 | State: (-105.0113296508789, 1.651507019996643, 178.77272033691406) | Terminated: False | Episode Length: 1.470000000000001 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -2.1010274263619473 | State: (-105.18612670898438, 1.6543292999267578, 178.79998779296875) | Terminated: False | Episode Length: 1.480000000000001 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -2.1170068933722783 | State: (-105.3609619140625, 1.6571464538574219, 178.8272705078125) | Terminated: False | Episode Length: 1.490000000000001 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -2.132999980854017 | State: (-105.53585052490234, 1.6599586009979248, 178.8545379638672) | Terminated: False | Episode Length: 1.500000000000001 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -2.149006440920324 | State: (-105.71077728271484, 1.662765622138977, 178.88180541992188) | Terminated: False | Episode Length: 1.5100000000000011 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -2.165026024694797 | State: (-105.88574981689453, 1.665567398071289, 178.90907287597656) | Terminated: False | Episode Length: 1.5200000000000011 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -2.181058482316445 | State: (-106.0607681274414, 1.6683640480041504, 178.9363555908203) | Terminated: False | Episode Length: 1.5300000000000011 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -2.197103562944743 | State: (-106.23583984375, 1.6711554527282715, 178.963623046875) | Terminated: False | Episode Length: 1.5400000000000011 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -2.213161014764773 | State: (-106.41095733642578, 1.673941731452942, 178.99090576171875) | Terminated: False | Episode Length: 1.5500000000000012 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -2.2292305849924503 | State: (-106.58612060546875, 1.676722526550293, 179.01817321777344) | Terminated: False | Episode Length: 1.5600000000000012 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -2.245312019879838 | State: (-106.76133728027344, 1.6794981956481934, 179.0454559326172) | Terminated: False | Episode Length: 1.5700000000000012 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -2.261405064720541 | State: (-106.93659973144531, 1.6822683811187744, 179.0727081298828) | Terminated: False | Episode Length: 1.5800000000000012 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -2.277509463855189 | State: (-107.11190795898438, 1.6850332021713257, 179.09999084472656) | Terminated: False | Episode Length: 1.5900000000000012 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -2.293624960677007 | State: (-107.28726959228516, 1.6877926588058472, 179.12725830078125) | Terminated: False | Episode Length: 1.6000000000000012 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -2.309751297637465 | State: (-107.46267700195312, 1.6905466318130493, 179.154541015625) | Terminated: False | Episode Length: 1.6100000000000012 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -2.325888216252017 | State: (-107.63813781738281, 1.6932951211929321, 179.1818084716797) | Terminated: False | Episode Length: 1.6200000000000012 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -2.3420354571059283 | State: (-107.81365203857422, 1.6960381269454956, 179.20909118652344) | Terminated: False | Episode Length: 1.6300000000000012 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -2.3581927598601795 | State: (-107.98921203613281, 1.6987755298614502, 179.23635864257812) | Terminated: False | Episode Length: 1.6400000000000012 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -2.3743598632574656 | State: (-108.16482543945312, 1.701507329940796, 179.2636260986328) | Terminated: False | Episode Length: 1.6500000000000012 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -2.390536505128274 | State: (-108.34049224853516, 1.7042336463928223, 179.2908935546875) | Terminated: False | Episode Length: 1.6600000000000013 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -2.406722422397049 | State: (-108.5162124633789, 1.7069542407989502, 179.3181610107422) | Terminated: False | Episode Length: 1.6700000000000013 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -2.422917351088446 | State: (-108.69197845458984, 1.7096692323684692, 179.34544372558594) | Terminated: False | Episode Length: 1.6800000000000013 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -2.4391210263336625 | State: (-108.8677978515625, 1.7123783826828003, 179.37271118164062) | Terminated: False | Episode Length: 1.6900000000000013 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -2.4553331823768634 | State: (-109.0436782836914, 1.7150819301605225, 179.39999389648438) | Terminated: False | Episode Length: 1.7000000000000013 | C_L: 1.0\n", + "Action: [1. 0.143] | Reward: -2.4715535525816845 | State: (-109.21961212158203, 1.7177796363830566, 179.48179626464844) | Terminated: False | Episode Length: 1.7100000000000013 | C_L: 1.0\n", + "Action: [1. 0.143] | Reward: -2.4877818692431566 | State: (-109.39559173583984, 1.7204716205596924, 179.56362915039062) | Terminated: False | Episode Length: 1.7200000000000013 | C_L: 1.0\n", + "Action: [1. 0.143] | Reward: -2.5040178638218187 | State: (-109.5716323852539, 1.7231576442718506, 179.64544677734375) | Terminated: False | Episode Length: 1.7300000000000013 | C_L: 1.0\n", + "Action: [1. 0.143] | Reward: -2.5202612669522377 | State: (-109.74772644042969, 1.7258379459381104, 179.72726440429688) | Terminated: False | Episode Length: 1.7400000000000013 | C_L: 1.0\n", + "Action: [1. 0.143] | Reward: -2.536511808451626 | State: (-109.92387390136719, 1.728512167930603, 179.80908203125) | Terminated: False | Episode Length: 1.7500000000000013 | C_L: 1.0\n", + "Action: [1. 0.143] | Reward: -2.5527692173285574 | State: (-110.10008239746094, 1.7311806678771973, 179.89089965820312) | Terminated: False | Episode Length: 1.7600000000000013 | C_L: 1.0\n", + "Action: [1. 0.143] | Reward: -2.5690332217917833 | State: (-110.2763442993164, 1.7338430881500244, 179.97271728515625) | Terminated: False | Episode Length: 1.7700000000000014 | C_L: 1.0\n", + "Action: [1. 0.143] | Reward: -2.5853035492591454 | State: (-110.45265197753906, 1.736499547958374, 180.05453491210938) | Terminated: False | Episode Length: 1.7800000000000014 | C_L: 1.0\n", + "Action: [1. 0.143] | Reward: -2.6015799263665875 | State: (-110.62902069091797, 1.7391499280929565, 180.1363525390625) | Terminated: False | Episode Length: 1.7900000000000014 | C_L: 1.0\n", + "Action: [1. 0.143] | Reward: -2.6178620789772657 | State: (-110.8054428100586, 1.7417943477630615, 180.21817016601562) | Terminated: False | Episode Length: 1.8000000000000014 | C_L: 1.0\n", + "Action: [1. 0.143] | Reward: -2.634149732190753 | State: (-110.98191833496094, 1.7444326877593994, 180.29998779296875) | Terminated: False | Episode Length: 1.8100000000000014 | C_L: 1.0\n", + "Action: [1. 0.143] | Reward: -2.6504426103523446 | State: (-111.158447265625, 1.7470648288726807, 180.38180541992188) | Terminated: False | Episode Length: 1.8200000000000014 | C_L: 1.0\n", + "Action: [1. 0.143] | Reward: -2.6667404370624554 | State: (-111.33502960205078, 1.7496908903121948, 180.463623046875) | Terminated: False | Episode Length: 1.8300000000000014 | C_L: 1.0\n", + "Action: [1. 0.143] | Reward: -2.683042935186117 | State: (-111.51166534423828, 1.7523107528686523, 180.54544067382812) | Terminated: False | Episode Length: 1.8400000000000014 | C_L: 1.0\n", + "Action: [1. 0.143] | Reward: -2.699349826862568 | State: (-111.6883544921875, 1.7549244165420532, 180.6272735595703) | Terminated: False | Episode Length: 1.8500000000000014 | C_L: 1.0\n", + "Action: [1. 0.143] | Reward: -2.715660833514941 | State: (-111.86509704589844, 1.757531762123108, 180.70907592773438) | Terminated: False | Episode Length: 1.8600000000000014 | C_L: 1.0\n", + "Action: [1. 0.143] | Reward: -2.731975675860041 | State: (-112.0418930053711, 1.760132908821106, 180.79090881347656) | Terminated: False | Episode Length: 1.8700000000000014 | C_L: 1.0\n", + "Action: [1. 0.143] | Reward: -2.7482940739182244 | State: (-112.21873474121094, 1.7627277374267578, 180.87271118164062) | Terminated: False | Episode Length: 1.8800000000000014 | C_L: 1.0\n", + "Action: [1. 0.143] | Reward: -2.7646157470233628 | State: (-112.3956298828125, 1.7653162479400635, 180.9545440673828) | Terminated: False | Episode Length: 1.8900000000000015 | C_L: 1.0\n", + "Action: [1. 0.143] | Reward: -2.780940413832907 | State: (-112.57258605957031, 1.7678983211517334, 181.03634643554688) | Terminated: False | Episode Length: 1.9000000000000015 | C_L: 1.0\n", + "Action: [1. 0.143] | Reward: -2.7972677923380402 | State: (-112.74958038330078, 1.7704740762710571, 181.1181640625) | Terminated: False | Episode Length: 1.9100000000000015 | C_L: 1.0\n", + "Action: [1. 0.143] | Reward: -2.8135975998739253 | State: (-112.9266357421875, 1.7730433940887451, 181.1999969482422) | Terminated: False | Episode Length: 1.9200000000000015 | C_L: 1.0\n", + "Action: [1. 0.143] | Reward: -2.8299295531300404 | State: (-113.10372924804688, 1.7756062746047974, 181.28179931640625) | Terminated: False | Episode Length: 1.9300000000000015 | C_L: 1.0\n", + "Action: [1. 0.143] | Reward: -2.8462633681606087 | State: (-113.2808837890625, 1.7781625986099243, 181.36363220214844) | Terminated: False | Episode Length: 1.9400000000000015 | C_L: 1.0\n", + "Action: [1. 0.143] | Reward: -2.8625987603951164 | State: (-113.45808410644531, 1.7807124853134155, 181.4454345703125) | Terminated: False | Episode Length: 1.9500000000000015 | C_L: 1.0\n", + "Action: [1. 0.143] | Reward: -2.8789354446489215 | State: (-113.63533782958984, 1.7832558155059814, 181.5272674560547) | Terminated: False | Episode Length: 1.9600000000000015 | C_L: 1.0\n", + "Action: [1. 0.143] | Reward: -2.8952731351339502 | State: (-113.81263732910156, 1.7857924699783325, 181.6090850830078) | Terminated: False | Episode Length: 1.9700000000000015 | C_L: 1.0\n", + "Action: [1. 0.143] | Reward: -2.9116115454694844 | State: (-113.98998260498047, 1.7883225679397583, 181.69090270996094) | Terminated: False | Episode Length: 1.9800000000000015 | C_L: 1.0\n", + "Action: [1. 0.143] | Reward: -2.9279503886930343 | State: (-114.1673812866211, 1.7908461093902588, 181.77272033691406) | Terminated: False | Episode Length: 1.9900000000000015 | C_L: 1.0\n", + "Action: [1. 0.143] | Reward: -2.9442893772712995 | State: (-114.3448257446289, 1.7933628559112549, 181.85452270507812) | Terminated: False | Episode Length: 2.0000000000000013 | C_L: 1.0\n", + "Action: [1. 0.143] | Reward: -2.9606282231112164 | State: (-114.5223159790039, 1.7958730459213257, 181.9363555908203) | Terminated: False | Episode Length: 2.010000000000001 | C_L: 1.0\n", + "Action: [1. 0.143] | Reward: -2.976966637571092 | State: (-114.6998519897461, 1.7983763217926025, 182.01817321777344) | Terminated: False | Episode Length: 2.020000000000001 | C_L: 1.0\n", + "Action: [1. 0.143] | Reward: -2.99330433147182 | State: (-114.87743377685547, 1.800873041152954, 182.09999084472656) | Terminated: False | Episode Length: 2.0300000000000007 | C_L: 1.0\n", + "Action: [1. 0.143] | Reward: -3.0096410151081865 | State: (-115.0550765991211, 1.8033628463745117, 182.1818084716797) | Terminated: False | Episode Length: 2.0400000000000005 | C_L: 1.0\n", + "Action: [1. 0.143] | Reward: -3.025976398260254 | State: (-115.23275756835938, 1.805845856666565, 182.2636260986328) | Terminated: False | Episode Length: 2.0500000000000003 | C_L: 1.0\n", + "Action: [1. 0.143] | Reward: -3.042310190204832 | State: (-115.41047668457031, 1.8083220720291138, 182.34544372558594) | Terminated: False | Episode Length: 2.06 | C_L: 1.0\n", + "Action: [1. 0.143] | Reward: -3.058642099727028 | State: (-115.58824157714844, 1.8107913732528687, 182.42726135253906) | Terminated: False | Episode Length: 2.07 | C_L: 1.0\n", + "Action: [1. 0.143] | Reward: -3.0749718351318798 | State: (-115.76605987548828, 1.8132537603378296, 182.5090789794922) | Terminated: False | Episode Length: 2.0799999999999996 | C_L: 1.0\n", + "Action: [1. 0.143] | Reward: -3.09129910425607 | State: (-115.94391632080078, 1.815709114074707, 182.59091186523438) | Terminated: False | Episode Length: 2.0899999999999994 | C_L: 1.0\n", + "Action: [1. 0.143] | Reward: -3.107623614479719 | State: (-116.12181854248047, 1.81815767288208, 182.67271423339844) | Terminated: False | Episode Length: 2.099999999999999 | C_L: 1.0\n", + "Action: [1. 0.143] | Reward: -3.123945072738257 | State: (-116.29975891113281, 1.8205991983413696, 182.75453186035156) | Terminated: False | Episode Length: 2.109999999999999 | C_L: 1.0\n", + "Action: [1. 0.143] | Reward: -3.140263185534376 | State: (-116.47773742675781, 1.8230335712432861, 182.8363494873047) | Terminated: False | Episode Length: 2.1199999999999988 | C_L: 1.0\n", + "Action: [1. 0.143] | Reward: -3.156577658950055 | State: (-116.65576171875, 1.8254610300064087, 182.9181671142578) | Terminated: False | Episode Length: 2.1299999999999986 | C_L: 1.0\n", + "Action: [1. 0.143] | Reward: -3.1728881986586677 | State: (-116.83383178710938, 1.8278813362121582, 182.99998474121094) | Terminated: False | Episode Length: 2.1399999999999983 | C_L: 1.0\n", + "Action: [1. 0.143] | Reward: -3.189194509937161 | State: (-117.0119400024414, 1.8302946090698242, 183.08180236816406) | Terminated: False | Episode Length: 2.149999999999998 | C_L: 1.0\n", + "Action: [1. 0.143] | Reward: -3.2054962976783097 | State: (-117.1900863647461, 1.8327007293701172, 183.16363525390625) | Terminated: False | Episode Length: 2.159999999999998 | C_L: 1.0\n", + "Action: [1. 0.143] | Reward: -3.221793266403047 | State: (-117.3682861328125, 1.8350995779037476, 183.2454376220703) | Terminated: False | Episode Length: 2.1699999999999977 | C_L: 1.0\n", + "Action: [1. 0.143] | Reward: -3.238085120272865 | State: (-117.5465087890625, 1.8374913930892944, 183.3272705078125) | Terminated: False | Episode Length: 2.1799999999999975 | C_L: 1.0\n", + "Action: [1. 0.143] | Reward: -3.2543715631022883 | State: (-117.72477722167969, 1.8398758172988892, 183.40907287597656) | Terminated: False | Episode Length: 2.1899999999999973 | C_L: 1.0\n", + "Action: [1. 0.143] | Reward: -3.2706522983714215 | State: (-117.903076171875, 1.8422530889511108, 183.49090576171875) | Terminated: False | Episode Length: 2.199999999999997 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -3.2869270292385626 | State: (-118.0814208984375, 1.84462308883667, 183.51817321777344) | Terminated: False | Episode Length: 2.209999999999997 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -3.303195456474462 | State: (-118.25981140136719, 1.8469856977462769, 183.54544067382812) | Terminated: False | Episode Length: 2.2199999999999966 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -3.3194572804396776 | State: (-118.43826293945312, 1.8493410348892212, 183.57272338867188) | Terminated: False | Episode Length: 2.2299999999999964 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -3.3357122010952365 | State: (-118.61676025390625, 1.8516889810562134, 183.5999755859375) | Terminated: False | Episode Length: 2.239999999999996 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -3.351959918013358 | State: (-118.79530334472656, 1.8540295362472534, 183.62725830078125) | Terminated: False | Episode Length: 2.249999999999996 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -3.368200130388244 | State: (-118.9738998413086, 1.8563627004623413, 183.65452575683594) | Terminated: False | Episode Length: 2.259999999999996 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -3.384432537046931 | State: (-119.15253448486328, 1.858688473701477, 183.6818084716797) | Terminated: False | Episode Length: 2.2699999999999956 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -3.400656836460206 | State: (-119.33122253417969, 1.8610066175460815, 183.70907592773438) | Terminated: False | Episode Length: 2.2799999999999954 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -3.4168727267535886 | State: (-119.50997161865234, 1.8633173704147339, 183.73635864257812) | Terminated: False | Episode Length: 2.289999999999995 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -3.4330799057183685 | State: (-119.68875885009766, 1.865620493888855, 183.7636260986328) | Terminated: False | Episode Length: 2.299999999999995 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -3.4492780708227113 | State: (-119.86759948730469, 1.8679161071777344, 183.7908935546875) | Terminated: False | Episode Length: 2.3099999999999947 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -3.4654669192228194 | State: (-120.04649353027344, 1.870204210281372, 183.8181610107422) | Terminated: False | Episode Length: 2.3199999999999945 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -3.4816461477741565 | State: (-120.22543334960938, 1.872484564781189, 183.84544372558594) | Terminated: False | Episode Length: 2.3299999999999943 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -3.4978154530427314 | State: (-120.40442657470703, 1.8747572898864746, 183.87271118164062) | Terminated: False | Episode Length: 2.339999999999994 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -3.5139745313164386 | State: (-120.58345794677734, 1.877022385597229, 183.89999389648438) | Terminated: False | Episode Length: 2.349999999999994 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -3.530123078616458 | State: (-120.76253509521484, 1.8792798519134521, 183.92726135253906) | Terminated: False | Episode Length: 2.3599999999999937 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -3.5462607907087116 | State: (-120.94165802001953, 1.881529450416565, 183.9545440673828) | Terminated: False | Episode Length: 2.3699999999999934 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -3.562387363115378 | State: (-121.12084197998047, 1.8837714195251465, 183.9818115234375) | Terminated: False | Episode Length: 2.3799999999999932 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -3.5785024911264607 | State: (-121.3000717163086, 1.8860055208206177, 184.0090789794922) | Terminated: False | Episode Length: 2.389999999999993 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -3.594605869811414 | State: (-121.47933197021484, 1.888231873512268, 184.03634643554688) | Terminated: False | Episode Length: 2.399999999999993 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -3.6106971940308203 | State: (-121.65865325927734, 1.890450358390808, 184.06362915039062) | Terminated: False | Episode Length: 2.4099999999999926 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -3.626776158448124 | State: (-121.8380126953125, 1.8926609754562378, 184.0908966064453) | Terminated: False | Episode Length: 2.4199999999999924 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -3.6428424575414162 | State: (-122.0174331665039, 1.8948637247085571, 184.11817932128906) | Terminated: False | Episode Length: 2.429999999999992 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -3.6588957856152735 | State: (-122.19688415527344, 1.8970584869384766, 184.14544677734375) | Terminated: False | Episode Length: 2.439999999999992 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -3.674935836812646 | State: (-122.37638092041016, 1.8992453813552856, 184.17271423339844) | Terminated: False | Episode Length: 2.4499999999999917 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -3.690962305126797 | State: (-122.55593872070312, 1.9014242887496948, 184.19998168945312) | Terminated: False | Episode Length: 2.4599999999999915 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -3.7069748844132926 | State: (-122.73551177978516, 1.903595209121704, 184.22726440429688) | Terminated: False | Episode Length: 2.4699999999999913 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -3.7229732684020393 | State: (-122.91514587402344, 1.9057581424713135, 184.25453186035156) | Terminated: False | Episode Length: 2.479999999999991 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -3.738957150709372 | State: (-123.09484100341797, 1.9079129695892334, 184.2818145751953) | Terminated: False | Episode Length: 2.489999999999991 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -3.754926224850187 | State: (-123.27455139160156, 1.9100596904754639, 184.30908203125) | Terminated: False | Episode Length: 2.4999999999999907 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -3.7708801842501214 | State: (-123.4543228149414, 1.9121984243392944, 184.33636474609375) | Terminated: False | Episode Length: 2.5099999999999905 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -3.7868187222577805 | State: (-123.6341323852539, 1.914328932762146, 184.36363220214844) | Terminated: False | Episode Length: 2.5199999999999902 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -3.8027415321570084 | State: (-123.81397247314453, 1.916451334953308, 184.39088439941406) | Terminated: False | Episode Length: 2.52999999999999 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -3.818648307179203 | State: (-123.99386596679688, 1.9185655117034912, 184.4181671142578) | Terminated: False | Episode Length: 2.53999999999999 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -3.8345387405156734 | State: (-124.17381286621094, 1.9206715822219849, 184.4454345703125) | Terminated: False | Episode Length: 2.5499999999999896 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -3.8504125253300407 | State: (-124.35379028320312, 1.9227694272994995, 184.47271728515625) | Terminated: False | Episode Length: 2.5599999999999894 | C_L: 1.0\n", + "Action: [ 1. -0.524] | Reward: -3.86626935477068 | State: (-124.53380584716797, 1.9248589277267456, 184.17271423339844) | Terminated: False | Episode Length: 2.569999999999989 | C_L: 1.0\n", + "Action: [ 1. -0.524] | Reward: -3.882108902017585 | State: (-124.71397399902344, 1.9269402027130127, 183.87271118164062) | Terminated: False | Episode Length: 2.579999999999989 | C_L: 1.0\n", + "Action: [ 1. -0.524] | Reward: -3.897930841142955 | State: (-124.89427947998047, 1.9290131330490112, 183.57272338867188) | Terminated: False | Episode Length: 2.5899999999999888 | C_L: 1.0\n", + "Action: [ 1. -0.524] | Reward: -3.913734847156439 | State: (-125.07470703125, 1.9310777187347412, 183.27272033691406) | Terminated: False | Episode Length: 2.5999999999999885 | C_L: 1.0\n", + "Action: [ 1. -0.524] | Reward: -3.9295205960504944 | State: (-125.25526428222656, 1.9331339597702026, 182.97271728515625) | Terminated: False | Episode Length: 2.6099999999999883 | C_L: 1.0\n", + "Action: [ 1. -0.524] | Reward: -3.945287764845834 | State: (-125.4359359741211, 1.9351818561553955, 182.67271423339844) | Terminated: False | Episode Length: 2.619999999999988 | C_L: 1.0\n", + "Action: [ 1. -0.524] | Reward: -3.961036031636963 | State: (-125.6167221069336, 1.9372212886810303, 182.3727264404297) | Terminated: False | Episode Length: 2.629999999999988 | C_L: 1.0\n", + "Action: [ 1. -0.524] | Reward: -3.9767650756377853 | State: (-125.79761505126953, 1.939252257347107, 182.0727081298828) | Terminated: False | Episode Length: 2.6399999999999877 | C_L: 1.0\n", + "Action: [ 1. -0.524] | Reward: -3.9924745772272807 | State: (-125.97859954833984, 1.9412747621536255, 181.77272033691406) | Terminated: False | Episode Length: 2.6499999999999875 | C_L: 1.0\n", + "Action: [ 1. -0.524] | Reward: -4.008164217995231 | State: (-126.15968322753906, 1.943288803100586, 181.47271728515625) | Terminated: False | Episode Length: 2.6599999999999873 | C_L: 1.0\n", + "Action: [ 1. -0.524] | Reward: -4.0238336807879875 | State: (-126.3408432006836, 1.9452942609786987, 181.17271423339844) | Terminated: False | Episode Length: 2.669999999999987 | C_L: 1.0\n", + "Action: [ 1. -0.524] | Reward: -4.039482649754282 | State: (-126.52207946777344, 1.9472912549972534, 180.87271118164062) | Terminated: False | Episode Length: 2.679999999999987 | C_L: 1.0\n", + "Action: [1. 0.238] | Reward: -4.055110810391046 | State: (-126.70337677001953, 1.949279546737671, 181.0090789794922) | Terminated: False | Episode Length: 2.6899999999999866 | C_L: 1.0\n", + "Action: [1. 0.238] | Reward: -4.07071785484177 | State: (-126.88471984863281, 1.9512593746185303, 181.14544677734375) | Terminated: False | Episode Length: 2.6999999999999864 | C_L: 1.0\n", + "Action: [1. 0.238] | Reward: -4.086303475689346 | State: (-127.06608581542969, 1.9532305002212524, 181.28179931640625) | Terminated: False | Episode Length: 2.709999999999986 | C_L: 1.0\n", + "Action: [1. 0.238] | Reward: -4.101867365975675 | State: (-127.24749755859375, 1.9551929235458374, 181.4181671142578) | Terminated: False | Episode Length: 2.719999999999986 | C_L: 1.0\n", + "Action: [1. 0.238] | Reward: -4.117409219221296 | State: (-127.4289321899414, 1.9571467638015747, 181.55453491210938) | Terminated: False | Episode Length: 2.7299999999999858 | C_L: 1.0\n", + "Action: [1. 0.238] | Reward: -4.132928729445028 | State: (-127.61039733886719, 1.9590917825698853, 181.69090270996094) | Terminated: False | Episode Length: 2.7399999999999856 | C_L: 1.0\n", + "Action: [ 1. -0.524] | Reward: -4.148425591183629 | State: (-127.7918930053711, 1.9610282182693481, 181.39089965820312) | Terminated: False | Episode Length: 2.7499999999999853 | C_L: 1.0\n", + "Action: [1. 0.238] | Reward: -4.163899488425673 | State: (-127.97346496582031, 1.9629558324813843, 181.5272674560547) | Terminated: False | Episode Length: 2.759999999999985 | C_L: 1.0\n", + "Action: [ 1. -0.524] | Reward: -4.179350115735466 | State: (-128.15505981445312, 1.9648746252059937, 181.22726440429688) | Terminated: False | Episode Length: 2.769999999999985 | C_L: 1.0\n", + "Action: [1. 0.238] | Reward: -4.194777158159935 | State: (-128.33673095703125, 1.9667847156524658, 181.36363220214844) | Terminated: False | Episode Length: 2.7799999999999847 | C_L: 1.0\n", + "Action: [ 1. -0.524] | Reward: -4.210180310339464 | State: (-128.5184326171875, 1.9686859846115112, 181.06361389160156) | Terminated: False | Episode Length: 2.7899999999999845 | C_L: 1.0\n", + "Action: [ 1. -0.524] | Reward: -4.22555925845522 | State: (-128.7001953125, 1.9705783128738403, 180.7636260986328) | Terminated: False | Episode Length: 2.7999999999999843 | C_L: 1.0\n", + "Action: [1. 0.238] | Reward: -4.240913690303861 | State: (-128.8820343017578, 1.9724619388580322, 180.89999389648438) | Terminated: False | Episode Length: 2.809999999999984 | C_L: 1.0\n", + "Action: [ 1. -0.524] | Reward: -4.256243300211455 | State: (-129.0638885498047, 1.9743365049362183, 180.59999084472656) | Terminated: False | Episode Length: 2.819999999999984 | C_L: 1.0\n", + "Action: [1. 0.238] | Reward: -4.271547777242017 | State: (-129.24581909179688, 1.9762022495269775, 180.73635864257812) | Terminated: False | Episode Length: 2.8299999999999836 | C_L: 1.0\n", + "Action: [ 1. -0.524] | Reward: -4.286826815920612 | State: (-129.42776489257812, 1.9780590534210205, 180.4363555908203) | Terminated: False | Episode Length: 2.8399999999999834 | C_L: 1.0\n", + "Action: [1. 0.524] | Reward: -4.302080106654914 | State: (-129.60977172851562, 1.9799067974090576, 180.73635864257812) | Terminated: False | Episode Length: 2.849999999999983 | C_L: 1.0\n", + "Action: [1. 0.524] | Reward: -4.317307346011213 | State: (-129.7917938232422, 1.981745719909668, 181.03634643554688) | Terminated: False | Episode Length: 2.859999999999983 | C_L: 1.0\n", + "Action: [1. 0.524] | Reward: -4.3325082324477515 | State: (-129.97384643554688, 1.9835755825042725, 181.3363494873047) | Terminated: False | Episode Length: 2.869999999999983 | C_L: 1.0\n", + "Action: [1. 0.524] | Reward: -4.3476824663604905 | State: (-130.15591430664062, 1.985396385192871, 181.6363525390625) | Terminated: False | Episode Length: 2.8799999999999826 | C_L: 1.0\n", + "Action: [1. 0.524] | Reward: -4.362829750128748 | State: (-130.3379669189453, 1.9872081279754639, 181.9363555908203) | Terminated: False | Episode Length: 2.8899999999999824 | C_L: 1.0\n", + "Action: [1. 0.524] | Reward: -4.377949788160709 | State: (-130.52003479003906, 1.9890108108520508, 182.23634338378906) | Terminated: False | Episode Length: 2.899999999999982 | C_L: 1.0\n", + "Action: [1. 0.524] | Reward: -4.393042286938794 | State: (-130.70208740234375, 1.9908044338226318, 182.53636169433594) | Terminated: False | Episode Length: 2.909999999999982 | C_L: 1.0\n", + "Action: [ 1. -0.048] | Reward: -4.4081069550648735 | State: (-130.8841094970703, 1.9925888776779175, 182.5090789794922) | Terminated: False | Episode Length: 2.9199999999999817 | C_L: 1.0\n", + "Action: [ 1. -0.048] | Reward: -4.423143488087061 | State: (-131.06619262695312, 1.9943642616271973, 182.4818115234375) | Terminated: False | Episode Length: 2.9299999999999815 | C_L: 1.0\n", + "Action: [ 1. -0.048] | Reward: -4.438151581968972 | State: (-131.248291015625, 1.9961304664611816, 182.45452880859375) | Terminated: False | Episode Length: 2.9399999999999813 | C_L: 1.0\n", + "Action: [ 1. -0.048] | Reward: -4.453130933103363 | State: (-131.43043518066406, 1.9978874921798706, 182.42726135253906) | Terminated: False | Episode Length: 2.949999999999981 | C_L: 1.0\n", + "Action: [ 1. -0.048] | Reward: -4.468081238325779 | State: (-131.6126251220703, 1.9996353387832642, 182.39999389648438) | Terminated: False | Episode Length: 2.959999999999981 | C_L: 1.0\n", + "Action: [ 1. -0.048] | Reward: -4.483002194928214 | State: (-131.79486083984375, 2.0013740062713623, 182.3727264404297) | Terminated: False | Episode Length: 2.9699999999999807 | C_L: 1.0\n", + "Action: [ 1. -0.048] | Reward: -4.497893500672772 | State: (-131.9771270751953, 2.003103256225586, 182.34544372558594) | Terminated: False | Episode Length: 2.9799999999999804 | C_L: 1.0\n", + "Action: [ 1. -0.048] | Reward: -4.512754853805345 | State: (-132.15943908691406, 2.0048234462738037, 182.31817626953125) | Terminated: False | Episode Length: 2.9899999999999802 | C_L: 1.0\n", + "Action: [ 1. -0.048] | Reward: -4.527585953069288 | State: (-132.34178161621094, 2.0065343379974365, 182.2908935546875) | Terminated: False | Episode Length: 2.99999999999998 | C_L: 1.0\n", + "Action: [ 1. -0.048] | Reward: -4.542386497719104 | State: (-132.52415466308594, 2.0082356929779053, 182.2636260986328) | Terminated: False | Episode Length: 3.00999999999998 | C_L: 1.0\n", + "Action: [ 1. -0.048] | Reward: -4.55715618753413 | State: (-132.70655822753906, 2.009927988052368, 182.23634338378906) | Terminated: False | Episode Length: 3.0199999999999796 | C_L: 1.0\n", + "Action: [ 1. -0.048] | Reward: -4.571894722832227 | State: (-132.88900756835938, 2.011610984802246, 182.20907592773438) | Terminated: False | Episode Length: 3.0299999999999794 | C_L: 1.0\n", + "Action: [ 1. -0.048] | Reward: -4.586601804483468 | State: (-133.07150268554688, 2.01328444480896, 182.1818084716797) | Terminated: False | Episode Length: 3.039999999999979 | C_L: 1.0\n", + "Action: [ 1. -0.048] | Reward: -4.601277133923832 | State: (-133.25401306152344, 2.014948606491089, 182.154541015625) | Terminated: False | Episode Length: 3.049999999999979 | C_L: 1.0\n", + "Action: [1. 0.524] | Reward: -4.6159204131688885 | State: (-133.4365692138672, 2.0166032314300537, 182.45452880859375) | Terminated: False | Episode Length: 3.0599999999999787 | C_L: 1.0\n", + "Action: [1. 0.524] | Reward: -4.630531358858072 | State: (-133.6190948486328, 2.0182485580444336, 182.75453186035156) | Terminated: False | Episode Length: 3.0699999999999785 | C_L: 1.0\n", + "Action: [ 1. -0.048] | Reward: -4.645109690112906 | State: (-133.8015899658203, 2.0198843479156494, 182.72726440429688) | Terminated: False | Episode Length: 3.0799999999999783 | C_L: 1.0\n", + "Action: [1. 0.524] | Reward: -4.659655110702894 | State: (-133.984130859375, 2.0215108394622803, 183.02725219726562) | Terminated: False | Episode Length: 3.089999999999978 | C_L: 1.0\n", + "Action: [ 1. -0.048] | Reward: -4.674167342822648 | State: (-134.16661071777344, 2.023127555847168, 182.99998474121094) | Terminated: False | Episode Length: 3.099999999999978 | C_L: 1.0\n", + "Action: [1. 0.524] | Reward: -4.688646091501134 | State: (-134.34913635253906, 2.0247349739074707, 183.29998779296875) | Terminated: False | Episode Length: 3.1099999999999777 | C_L: 1.0\n", + "Action: [1. 0.524] | Reward: -4.703091082099847 | State: (-134.53160095214844, 2.0263328552246094, 183.5999755859375) | Terminated: False | Episode Length: 3.1199999999999775 | C_L: 1.0\n", + "Action: [ 1. -0.048] | Reward: -4.717502042632462 | State: (-134.71401977539062, 2.027921199798584, 183.57272338867188) | Terminated: False | Episode Length: 3.1299999999999772 | C_L: 1.0\n", + "Action: [1. 0.524] | Reward: -4.731878680143065 | State: (-134.89646911621094, 2.0295000076293945, 183.87271118164062) | Terminated: False | Episode Length: 3.139999999999977 | C_L: 1.0\n", + "Action: [ 1. -0.048] | Reward: -4.74622072596709 | State: (-135.07887268066406, 2.031069278717041, 183.84544372558594) | Terminated: False | Episode Length: 3.149999999999977 | C_L: 1.0\n", + "Action: [1. 0.524] | Reward: -4.760527888566817 | State: (-135.26129150390625, 2.0326290130615234, 184.14544677734375) | Terminated: False | Episode Length: 3.1599999999999766 | C_L: 1.0\n", + "Action: [ 1. -0.048] | Reward: -4.7747999026914245 | State: (-135.44363403320312, 2.0341789722442627, 184.11817932128906) | Terminated: False | Episode Length: 3.1699999999999764 | C_L: 1.0\n", + "Action: [1. 0.524] | Reward: -4.789036478281722 | State: (-135.6260223388672, 2.035719394683838, 184.4181671142578) | Terminated: False | Episode Length: 3.179999999999976 | C_L: 1.0\n", + "Action: [1. 0.524] | Reward: -4.803237353591881 | State: (-135.80831909179688, 2.037250280380249, 184.71817016601562) | Terminated: False | Episode Length: 3.189999999999976 | C_L: 1.0\n", + "Action: [ 1. -0.048] | Reward: -4.817402269755742 | State: (-135.99053955078125, 2.038771390914917, 184.69090270996094) | Terminated: False | Episode Length: 3.1999999999999758 | C_L: 1.0\n", + "Action: [ 1. -0.048] | Reward: -4.831530939081078 | State: (-136.17279052734375, 2.040282964706421, 184.6636199951172) | Terminated: False | Episode Length: 3.2099999999999755 | C_L: 1.0\n", + "Action: [ 1. -0.048] | Reward: -4.845623074644081 | State: (-136.35507202148438, 2.0417847633361816, 184.6363525390625) | Terminated: False | Episode Length: 3.2199999999999753 | C_L: 1.0\n", + "Action: [ 1. -0.048] | Reward: -4.8596783903028316 | State: (-136.53736877441406, 2.043276786804199, 184.60906982421875) | Terminated: False | Episode Length: 3.229999999999975 | C_L: 1.0\n", + "Action: [ 1. -0.048] | Reward: -4.873696600710742 | State: (-136.71971130371094, 2.0447592735290527, 184.58180236816406) | Terminated: False | Episode Length: 3.239999999999975 | C_L: 1.0\n", + "Action: [ 1. -0.048] | Reward: -4.8876774213299905 | State: (-136.90208435058594, 2.046231985092163, 184.55453491210938) | Terminated: False | Episode Length: 3.2499999999999747 | C_L: 1.0\n", + "Action: [ 1. -0.048] | Reward: -4.901620568444924 | State: (-137.08447265625, 2.047694683074951, 184.5272674560547) | Terminated: False | Episode Length: 3.2599999999999745 | C_L: 1.0\n", + "Action: [ 1. -0.048] | Reward: -4.915525759175448 | State: (-137.26690673828125, 2.049147844314575, 184.49998474121094) | Terminated: False | Episode Length: 3.2699999999999743 | C_L: 1.0\n", + "Action: [ 1. -0.048] | Reward: -4.9293927114903875 | State: (-137.4493408203125, 2.050591230392456, 184.47271728515625) | Terminated: False | Episode Length: 3.279999999999974 | C_L: 1.0\n", + "Action: [ 1. -0.048] | Reward: -4.943221144220824 | State: (-137.63182067871094, 2.0520248413085938, 184.4454345703125) | Terminated: False | Episode Length: 3.289999999999974 | C_L: 1.0\n", + "Action: [ 1. -0.048] | Reward: -4.9570107770734095 | State: (-137.81431579589844, 2.053448438644409, 184.4181671142578) | Terminated: False | Episode Length: 3.2999999999999736 | C_L: 1.0\n", + "Action: [ 1. -0.048] | Reward: -4.9707613306436516 | State: (-137.99684143066406, 2.0548622608184814, 184.39088439941406) | Terminated: False | Episode Length: 3.3099999999999734 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -4.9844725264291725 | State: (-138.17938232421875, 2.0562663078308105, 184.4181671142578) | Terminated: False | Episode Length: 3.319999999999973 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -4.998144091887068 | State: (-138.3619384765625, 2.0576605796813965, 184.4454345703125) | Terminated: False | Episode Length: 3.329999999999973 | C_L: 1.0\n", + "Action: [ 1. -0.333] | Reward: -5.011775755462796 | State: (-138.54449462890625, 2.05904483795166, 184.25453186035156) | Terminated: False | Episode Length: 3.3399999999999728 | C_L: 1.0\n", + "Action: [ 1. -0.333] | Reward: -5.025367226383995 | State: (-138.72711181640625, 2.0604190826416016, 184.06362915039062) | Terminated: False | Episode Length: 3.3499999999999726 | C_L: 1.0\n", + "Action: [ 1. -0.333] | Reward: -5.0389182154155545 | State: (-138.90982055664062, 2.0617835521698, 183.87271118164062) | Terminated: False | Episode Length: 3.3599999999999723 | C_L: 1.0\n", + "Action: [ 1. -0.333] | Reward: -5.052428434886014 | State: (-139.0926055908203, 2.063138008117676, 183.6818084716797) | Terminated: False | Episode Length: 3.369999999999972 | C_L: 1.0\n", + "Action: [ 1. -0.333] | Reward: -5.06589759871386 | State: (-139.27545166015625, 2.0644824504852295, 183.49090576171875) | Terminated: False | Episode Length: 3.379999999999972 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -5.079325422433716 | State: (-139.45834350585938, 2.06581711769104, 183.51817321777344) | Terminated: False | Episode Length: 3.3899999999999717 | C_L: 1.0\n", + "Action: [ 1. -0.333] | Reward: -5.092711639348843 | State: (-139.64125061035156, 2.067141532897949, 183.3272705078125) | Terminated: False | Episode Length: 3.3999999999999715 | C_L: 1.0\n", + "Action: [ 1. -0.333] | Reward: -5.106055967516749 | State: (-139.82421875, 2.0684561729431152, 183.1363525390625) | Terminated: False | Episode Length: 3.4099999999999713 | C_L: 1.0\n", + "Action: [ 1. -0.333] | Reward: -5.119358126677849 | State: (-140.0072479248047, 2.06976056098938, 182.94544982910156) | Terminated: False | Episode Length: 3.419999999999971 | C_L: 1.0\n", + "Action: [ 1. -0.333] | Reward: -5.132617838281248 | State: (-140.19032287597656, 2.0710551738739014, 182.75453186035156) | Terminated: False | Episode Length: 3.429999999999971 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -5.145834825510409 | State: (-140.37344360351562, 2.0723395347595215, 182.7818145751953) | Terminated: False | Episode Length: 3.4399999999999706 | C_L: 1.0\n", + "Action: [ 1. -0.333] | Reward: -5.159008826205453 | State: (-140.5565643310547, 2.0736136436462402, 182.59091186523438) | Terminated: False | Episode Length: 3.4499999999999704 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -5.172139566262211 | State: (-140.73973083496094, 2.074877977371216, 182.6181640625) | Terminated: False | Episode Length: 3.45999999999997 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -5.1852267855770835 | State: (-140.9228973388672, 2.07613205909729, 182.64544677734375) | Terminated: False | Episode Length: 3.46999999999997 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -5.198270225204652 | State: (-141.1060791015625, 2.077376127243042, 182.67271423339844) | Terminated: False | Episode Length: 3.47999999999997 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -5.211269627369995 | State: (-141.2892303466797, 2.0786099433898926, 182.69998168945312) | Terminated: False | Episode Length: 3.4899999999999696 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -5.224224735480943 | State: (-141.47238159179688, 2.079833507537842, 182.72726440429688) | Terminated: False | Episode Length: 3.4999999999999694 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -5.237135294140291 | State: (-141.65554809570312, 2.0810470581054688, 182.75453186035156) | Terminated: False | Episode Length: 3.509999999999969 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -5.250001049157941 | State: (-141.8386993408203, 2.0822503566741943, 182.7818145751953) | Terminated: False | Episode Length: 3.519999999999969 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -5.26282174756299 | State: (-142.02183532714844, 2.0834434032440186, 182.80906677246094) | Terminated: False | Episode Length: 3.5299999999999687 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -5.275597137615759 | State: (-142.20497131347656, 2.0846264362335205, 182.8363494873047) | Terminated: False | Episode Length: 3.5399999999999685 | C_L: 1.0\n", + "Action: [ 1. -0.333] | Reward: -5.288326968819762 | State: (-142.38809204101562, 2.085798978805542, 182.64544677734375) | Terminated: False | Episode Length: 3.5499999999999683 | C_L: 1.0\n", + "Action: [ 1. -0.333] | Reward: -5.301010978088838 | State: (-142.57127380371094, 2.086961507797241, 182.45452880859375) | Terminated: False | Episode Length: 3.559999999999968 | C_L: 1.0\n", + "Action: [ 1. -0.333] | Reward: -5.313648904288825 | State: (-142.7544708251953, 2.088113784790039, 182.2636260986328) | Terminated: False | Episode Length: 3.569999999999968 | C_L: 1.0\n", + "Action: [ 1. -0.333] | Reward: -5.326240488261914 | State: (-142.93771362304688, 2.0892558097839355, 182.0727081298828) | Terminated: False | Episode Length: 3.5799999999999677 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -5.338785472850839 | State: (-143.1209716796875, 2.0903873443603516, 182.09999084472656) | Terminated: False | Episode Length: 3.5899999999999674 | C_L: 1.0\n", + "Action: [ 1. -0.333] | Reward: -5.351283613072819 | State: (-143.30422973632812, 2.0915088653564453, 181.90907287597656) | Terminated: False | Episode Length: 3.5999999999999672 | C_L: 1.0\n", + "Action: [ 1. -0.333] | Reward: -5.363734654931722 | State: (-143.48751831054688, 2.0926198959350586, 181.71817016601562) | Terminated: False | Episode Length: 3.609999999999967 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -5.376138346493309 | State: (-143.6708221435547, 2.0937206745147705, 181.74545288085938) | Terminated: False | Episode Length: 3.619999999999967 | C_L: 1.0\n", + "Action: [ 1. -0.333] | Reward: -5.388494446337712 | State: (-143.85411071777344, 2.094811201095581, 181.55453491210938) | Terminated: False | Episode Length: 3.6299999999999666 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -5.400802705790864 | State: (-144.0374298095703, 2.095891237258911, 181.58181762695312) | Terminated: False | Episode Length: 3.6399999999999664 | C_L: 1.0\n", + "Action: [ 1. -0.333] | Reward: -5.413062885938025 | State: (-144.22071838378906, 2.09696102142334, 181.39089965820312) | Terminated: False | Episode Length: 3.649999999999966 | C_L: 1.0\n", + "Action: [ 1. -0.333] | Reward: -5.4252747414259614 | State: (-144.40402221679688, 2.098020315170288, 181.1999969482422) | Terminated: False | Episode Length: 3.659999999999966 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -5.437438029059673 | State: (-144.5873565673828, 2.099069356918335, 181.22726440429688) | Terminated: False | Episode Length: 3.6699999999999657 | C_L: 1.0\n", + "Action: [ 1. -0.333] | Reward: -5.449552513681581 | State: (-144.77066040039062, 2.1001079082489014, 181.03634643554688) | Terminated: False | Episode Length: 3.6799999999999655 | C_L: 1.0\n", + "Action: [ 1. -0.333] | Reward: -5.461617955513294 | State: (-144.95396423339844, 2.1011362075805664, 180.84544372558594) | Terminated: False | Episode Length: 3.6899999999999653 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -5.473634116992825 | State: (-145.1372833251953, 2.102154016494751, 180.87271118164062) | Terminated: False | Episode Length: 3.699999999999965 | C_L: 1.0\n", + "Action: [ 1. -0.333] | Reward: -5.485600766835183 | State: (-145.32058715820312, 2.103161334991455, 180.6818084716797) | Terminated: False | Episode Length: 3.709999999999965 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -5.497517670988598 | State: (-145.50389099121094, 2.104158401489258, 180.70907592773438) | Terminated: False | Episode Length: 3.7199999999999647 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -5.509384600867567 | State: (-145.6871795654297, 2.10514497756958, 180.73635864257812) | Terminated: False | Episode Length: 3.7299999999999645 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -5.5212013293362965 | State: (-145.8704376220703, 2.106121063232422, 180.7636260986328) | Terminated: False | Episode Length: 3.7399999999999642 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -5.532967630719475 | State: (-146.05369567871094, 2.107086658477783, 180.79090881347656) | Terminated: False | Episode Length: 3.749999999999964 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -5.544683280812963 | State: (-146.23692321777344, 2.108041763305664, 180.81817626953125) | Terminated: False | Episode Length: 3.759999999999964 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -5.5563480568944 | State: (-146.42013549804688, 2.1089863777160645, 180.84544372558594) | Terminated: False | Episode Length: 3.7699999999999636 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -5.567961737733732 | State: (-146.6033172607422, 2.1099205017089844, 180.87271118164062) | Terminated: False | Episode Length: 3.7799999999999634 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -5.579524103603652 | State: (-146.7864990234375, 2.110844135284424, 180.89999389648438) | Terminated: False | Episode Length: 3.789999999999963 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -5.591034936289959 | State: (-146.96963500976562, 2.111757278442383, 180.92726135253906) | Terminated: False | Episode Length: 3.799999999999963 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -5.602494019101828 | State: (-147.15277099609375, 2.1126596927642822, 180.95452880859375) | Terminated: False | Episode Length: 3.8099999999999627 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -5.613901136881997 | State: (-147.33587646484375, 2.1135518550872803, 180.9818115234375) | Terminated: False | Episode Length: 3.8199999999999625 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -5.625256076016864 | State: (-147.51895141601562, 2.1144332885742188, 181.0090789794922) | Terminated: False | Episode Length: 3.8299999999999623 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -5.636558624446499 | State: (-147.70201110839844, 2.115304470062256, 181.03634643554688) | Terminated: False | Episode Length: 3.839999999999962 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -5.6478085716745605 | State: (-147.88504028320312, 2.1161649227142334, 181.06361389160156) | Terminated: False | Episode Length: 3.849999999999962 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -5.65900570877813 | State: (-148.0680389404297, 2.1170146465301514, 181.0908966064453) | Terminated: False | Episode Length: 3.8599999999999617 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -5.6701498284174505 | State: (-148.2510223388672, 2.117854118347168, 181.1181640625) | Terminated: False | Episode Length: 3.8699999999999615 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -5.681240724845572 | State: (-148.4339599609375, 2.118682622909546, 181.14544677734375) | Terminated: False | Episode Length: 3.8799999999999613 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -5.692278193917908 | State: (-148.61688232421875, 2.1195008754730225, 181.17271423339844) | Terminated: False | Episode Length: 3.889999999999961 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -5.7032620331017 | State: (-148.79977416992188, 2.1203083992004395, 181.1999969482422) | Terminated: False | Episode Length: 3.899999999999961 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -5.7141920414853775 | State: (-148.98263549804688, 2.121105432510376, 181.22726440429688) | Terminated: False | Episode Length: 3.9099999999999606 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -5.7250680197878365 | State: (-149.16546630859375, 2.121891736984253, 181.25453186035156) | Terminated: False | Episode Length: 3.9199999999999604 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -5.735889770367611 | State: (-149.34825134277344, 2.1226675510406494, 181.28179931640625) | Terminated: False | Episode Length: 3.92999999999996 | C_L: 1.0\n", + "Action: [ 1. -0.048] | Reward: -5.746657097231953 | State: (-149.53102111816406, 2.1234326362609863, 181.25453186035156) | Terminated: False | Episode Length: 3.93999999999996 | C_L: 1.0\n", + "Action: [ 1. -0.048] | Reward: -5.757369804230433 | State: (-149.7137451171875, 2.1241869926452637, 181.22726440429688) | Terminated: False | Episode Length: 3.9499999999999598 | C_L: 1.0\n", + "Action: [ 1. -0.048] | Reward: -5.768027696865897 | State: (-149.89645385742188, 2.1249308586120605, 181.1999969482422) | Terminated: False | Episode Length: 3.9599999999999596 | C_L: 1.0\n", + "Action: [ 1. -0.048] | Reward: -5.77863058230345 | State: (-150.07911682128906, 2.125663995742798, 181.17271423339844) | Terminated: False | Episode Length: 3.9699999999999593 | C_L: 1.0\n", + "Action: [ 1. -0.048] | Reward: -5.789178269379344 | State: (-150.2617645263672, 2.1263866424560547, 181.14544677734375) | Terminated: False | Episode Length: 3.979999999999959 | C_L: 1.0\n", + "Action: [ 1. -0.048] | Reward: -5.799670568609766 | State: (-150.4443817138672, 2.127098560333252, 181.1181640625) | Terminated: False | Episode Length: 3.989999999999959 | C_L: 1.0\n", + "Action: [ 1. -0.048] | Reward: -5.810107292199523 | State: (-150.626953125, 2.1277997493743896, 181.0908966064453) | Terminated: False | Episode Length: 3.9999999999999587 | C_L: 1.0\n", + "Action: [ 1. -0.048] | Reward: -5.820488254050628 | State: (-150.80950927734375, 2.128490447998047, 181.06361389160156) | Terminated: False | Episode Length: 4.009999999999959 | C_L: 1.0\n", + "Action: [ 1. -0.048] | Reward: -5.83081326977079 | State: (-150.9920196533203, 2.1291704177856445, 181.03634643554688) | Terminated: False | Episode Length: 4.019999999999959 | C_L: 1.0\n", + "Action: [ 1. -0.048] | Reward: -5.841082156681794 | State: (-151.17449951171875, 2.1298396587371826, 181.0090789794922) | Terminated: False | Episode Length: 4.0299999999999585 | C_L: 1.0\n", + "Action: [ 1. -0.048] | Reward: -5.851294733827783 | State: (-151.35693359375, 2.130498170852661, 180.9818115234375) | Terminated: False | Episode Length: 4.039999999999958 | C_L: 1.0\n", + "Action: [ 1. -0.048] | Reward: -5.861450821983434 | State: (-151.53933715820312, 2.13114595413208, 180.95452880859375) | Terminated: False | Episode Length: 4.049999999999958 | C_L: 1.0\n", + "Action: [ 1. -0.048] | Reward: -5.871550243662032 | State: (-151.72169494628906, 2.1317832469940186, 180.92726135253906) | Terminated: False | Episode Length: 4.059999999999958 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -5.881592823123437 | State: (-151.90402221679688, 2.1324098110198975, 180.95452880859375) | Terminated: False | Episode Length: 4.069999999999958 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -5.891578387737176 | State: (-152.0863037109375, 2.133025646209717, 180.9818115234375) | Terminated: False | Episode Length: 4.079999999999957 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -5.901506766643729 | State: (-152.26853942871094, 2.1336307525634766, 181.0090789794922) | Terminated: False | Episode Length: 4.089999999999957 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -5.911377790762026 | State: (-152.45074462890625, 2.1342251300811768, 181.03634643554688) | Terminated: False | Episode Length: 4.099999999999957 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -5.921191292796827 | State: (-152.6328887939453, 2.1348087787628174, 181.06361389160156) | Terminated: False | Episode Length: 4.109999999999957 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -5.9309471072459985 | State: (-152.8149871826172, 2.1353816986083984, 181.0908966064453) | Terminated: False | Episode Length: 4.119999999999957 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -5.940645070407678 | State: (-152.99703979492188, 2.13594388961792, 181.1181640625) | Terminated: False | Episode Length: 4.129999999999956 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -5.950285020387329 | State: (-153.17904663085938, 2.136495590209961, 181.14544677734375) | Terminated: False | Episode Length: 4.139999999999956 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -5.9598667971046835 | State: (-153.3610076904297, 2.1370363235473633, 181.17271423339844) | Terminated: False | Episode Length: 4.149999999999956 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -5.9693902423005705 | State: (-153.54290771484375, 2.137566328048706, 181.1999969482422) | Terminated: False | Episode Length: 4.159999999999956 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -5.978855199543637 | State: (-153.72474670410156, 2.1380858421325684, 181.22726440429688) | Terminated: False | Episode Length: 4.1699999999999555 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -5.988261514236952 | State: (-153.90655517578125, 2.138594388961792, 181.25453186035156) | Terminated: False | Episode Length: 4.179999999999955 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -5.9976090336245 | State: (-154.08828735351562, 2.139092206954956, 181.28179931640625) | Terminated: False | Episode Length: 4.189999999999955 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.006897606797556 | State: (-154.26998901367188, 2.1395792961120605, 181.30908203125) | Terminated: False | Episode Length: 4.199999999999955 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.016127084700949 | State: (-154.45162963867188, 2.1400558948516846, 181.3363494873047) | Terminated: False | Episode Length: 4.209999999999955 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.025297320139215 | State: (-154.63320922851562, 2.14052152633667, 181.36363220214844) | Terminated: False | Episode Length: 4.2199999999999545 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.034408167782622 | State: (-154.8147430419922, 2.1409764289855957, 181.39089965820312) | Terminated: False | Episode Length: 4.229999999999954 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.04345948417309 | State: (-154.99620056152344, 2.141420602798462, 181.4181671142578) | Terminated: False | Episode Length: 4.239999999999954 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.052451127729994 | State: (-155.1776123046875, 2.1418540477752686, 181.4454345703125) | Terminated: False | Episode Length: 4.249999999999954 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.061382958755842 | State: (-155.3589630126953, 2.1422767639160156, 181.47271728515625) | Terminated: False | Episode Length: 4.259999999999954 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.070254839441841 | State: (-155.54025268554688, 2.142688751220703, 181.49998474121094) | Terminated: False | Episode Length: 4.269999999999953 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.079066633873347 | State: (-155.7214813232422, 2.143089771270752, 181.5272674560547) | Terminated: False | Episode Length: 4.279999999999953 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.087818208035192 | State: (-155.90264892578125, 2.1434803009033203, 181.55453491210938) | Terminated: False | Episode Length: 4.289999999999953 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.096509429816898 | State: (-156.08375549316406, 2.143860101699829, 181.58181762695312) | Terminated: False | Episode Length: 4.299999999999953 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.105140169017764 | State: (-156.26480102539062, 2.144228935241699, 181.6090850830078) | Terminated: False | Episode Length: 4.3099999999999525 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.113710297351841 | State: (-156.44577026367188, 2.144587278366089, 181.6363525390625) | Terminated: False | Episode Length: 4.319999999999952 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.122219688452785 | State: (-156.62667846679688, 2.14493465423584, 181.6636199951172) | Terminated: False | Episode Length: 4.329999999999952 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.130668217878586 | State: (-156.80752563476562, 2.1452713012695312, 181.69090270996094) | Terminated: False | Episode Length: 4.339999999999952 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.1390557631161835 | State: (-156.98831176757812, 2.145597219467163, 181.71817016601562) | Terminated: False | Episode Length: 4.349999999999952 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.147382203585955 | State: (-157.16900634765625, 2.1459124088287354, 181.74545288085938) | Terminated: False | Episode Length: 4.3599999999999515 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.155647420646086 | State: (-157.3496551513672, 2.146217107772827, 181.77272033691406) | Terminated: False | Episode Length: 4.369999999999951 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.163851297596819 | State: (-157.53021240234375, 2.1465108394622803, 181.8000030517578) | Terminated: False | Episode Length: 4.379999999999951 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.171993719684582 | State: (-157.71070861816406, 2.1467936038970947, 181.82725524902344) | Terminated: False | Episode Length: 4.389999999999951 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.180074574105988 | State: (-157.89114379882812, 2.1470658779144287, 181.85452270507812) | Terminated: False | Episode Length: 4.399999999999951 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.188093750011724 | State: (-158.07150268554688, 2.147327423095703, 181.88180541992188) | Terminated: False | Episode Length: 4.40999999999995 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.196051138510307 | State: (-158.25177001953125, 2.147578239440918, 181.90907287597656) | Terminated: False | Episode Length: 4.41999999999995 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.203946632671721 | State: (-158.43197631835938, 2.1478183269500732, 181.9363555908203) | Terminated: False | Episode Length: 4.42999999999995 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.211780127530935 | State: (-158.61212158203125, 2.14804744720459, 181.963623046875) | Terminated: False | Episode Length: 4.43999999999995 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.219551520091291 | State: (-158.7921600341797, 2.148266077041626, 181.99090576171875) | Terminated: False | Episode Length: 4.4499999999999496 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.2272607093277745 | State: (-158.97213745117188, 2.1484739780426025, 182.01817321777344) | Terminated: False | Episode Length: 4.459999999999949 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.234907596190157 | State: (-159.15203857421875, 2.1486709117889404, 182.04544067382812) | Terminated: False | Episode Length: 4.469999999999949 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.242492083606021 | State: (-159.3318634033203, 2.148857355117798, 182.0727081298828) | Terminated: False | Episode Length: 4.479999999999949 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.250014076483651 | State: (-159.5115966796875, 2.1490330696105957, 182.09999084472656) | Terminated: False | Episode Length: 4.489999999999949 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.257473481714812 | State: (-159.69126892089844, 2.149197816848755, 182.12725830078125) | Terminated: False | Episode Length: 4.4999999999999485 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.264870208177399 | State: (-159.87083435058594, 2.1493520736694336, 182.154541015625) | Terminated: False | Episode Length: 4.509999999999948 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.272204166737958 | State: (-160.05032348632812, 2.1494956016540527, 182.1818084716797) | Terminated: False | Episode Length: 4.519999999999948 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.279475270254092 | State: (-160.229736328125, 2.1496284008026123, 182.20907592773438) | Terminated: False | Episode Length: 4.529999999999948 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.286683433576733 | State: (-160.40907287597656, 2.1497504711151123, 182.23634338378906) | Terminated: False | Episode Length: 4.539999999999948 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.293828573552299 | State: (-160.58831787109375, 2.1498618125915527, 182.2636260986328) | Terminated: False | Episode Length: 4.549999999999947 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.300910609024721 | State: (-160.76747131347656, 2.1499624252319336, 182.2908935546875) | Terminated: False | Episode Length: 4.559999999999947 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.307929460837343 | State: (-160.946533203125, 2.150052547454834, 182.31817626953125) | Terminated: False | Episode Length: 4.569999999999947 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.314885051834704 | State: (-161.12550354003906, 2.1501317024230957, 182.34544372558594) | Terminated: False | Episode Length: 4.579999999999947 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.321777306864194 | State: (-161.3043975830078, 2.150200366973877, 182.3727264404297) | Terminated: False | Episode Length: 4.589999999999947 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.3286061527775805 | State: (-161.4832000732422, 2.1502583026885986, 182.39999389648438) | Terminated: False | Episode Length: 4.599999999999946 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.335371518432417 | State: (-161.6619110107422, 2.1503055095672607, 182.42726135253906) | Terminated: False | Episode Length: 4.609999999999946 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.342073334693324 | State: (-161.8405303955078, 2.1503422260284424, 182.45452880859375) | Terminated: False | Episode Length: 4.619999999999946 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.348711534433144 | State: (-162.01904296875, 2.1503679752349854, 182.4818115234375) | Terminated: False | Episode Length: 4.629999999999946 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.355286052533976 | State: (-162.19747924804688, 2.150383234024048, 182.5090789794922) | Terminated: False | Episode Length: 4.6399999999999455 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.361796825888083 | State: (-162.3758087158203, 2.15038800239563, 182.53636169433594) | Terminated: False | Episode Length: 4.649999999999945 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.368243793398677 | State: (-162.55404663085938, 2.1503820419311523, 182.56362915039062) | Terminated: False | Episode Length: 4.659999999999945 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.374626895980577 | State: (-162.73219299316406, 2.1503653526306152, 182.59091186523438) | Terminated: False | Episode Length: 4.669999999999945 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.380946076560748 | State: (-162.91024780273438, 2.1503379344940186, 182.6181640625) | Terminated: False | Episode Length: 4.679999999999945 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.387201280078709 | State: (-163.08819580078125, 2.1503000259399414, 182.64544677734375) | Terminated: False | Episode Length: 4.689999999999944 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.393392453486827 | State: (-163.26605224609375, 2.1502513885498047, 182.67271423339844) | Terminated: False | Episode Length: 4.699999999999944 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.399519545750478 | State: (-163.4438018798828, 2.1501922607421875, 182.69998168945312) | Terminated: False | Episode Length: 4.709999999999944 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.40558250784809 | State: (-163.62144470214844, 2.1501224040985107, 182.72726440429688) | Terminated: False | Episode Length: 4.719999999999944 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.41158129277106 | State: (-163.7989959716797, 2.1500420570373535, 182.75453186035156) | Terminated: False | Episode Length: 4.729999999999944 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.417515855523552 | State: (-163.9764404296875, 2.149951219558716, 182.7818145751953) | Terminated: False | Episode Length: 4.739999999999943 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.423386153122169 | State: (-164.15377807617188, 2.1498496532440186, 182.80906677246094) | Terminated: False | Episode Length: 4.749999999999943 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.4291921445955 | State: (-164.33102416992188, 2.1497373580932617, 182.8363494873047) | Terminated: False | Episode Length: 4.759999999999943 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.434933790983549 | State: (-164.50814819335938, 2.1496148109436035, 182.86361694335938) | Terminated: False | Episode Length: 4.769999999999943 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.440611055337044 | State: (-164.68516540527344, 2.1494812965393066, 182.89089965820312) | Terminated: False | Episode Length: 4.7799999999999425 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.446223902716615 | State: (-164.86209106445312, 2.1493375301361084, 182.9181671142578) | Terminated: False | Episode Length: 4.789999999999942 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.45177230019186 | State: (-165.03890991210938, 2.1491832733154297, 182.94544982910156) | Terminated: False | Episode Length: 4.799999999999942 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.457256216840283 | State: (-165.21560668945312, 2.1490182876586914, 182.97271728515625) | Terminated: False | Episode Length: 4.809999999999942 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.462675623746114 | State: (-165.39218139648438, 2.1488428115844727, 182.99998474121094) | Terminated: False | Episode Length: 4.819999999999942 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.468030493999012 | State: (-165.56866455078125, 2.1486568450927734, 183.02725219726562) | Terminated: False | Episode Length: 4.8299999999999415 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.473320802692636 | State: (-165.7450408935547, 2.1484603881835938, 183.05453491210938) | Terminated: False | Episode Length: 4.839999999999941 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.478546526923108 | State: (-165.92127990722656, 2.1482534408569336, 183.08180236816406) | Terminated: False | Episode Length: 4.849999999999941 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.483707645787353 | State: (-166.097412109375, 2.148035764694214, 183.1090850830078) | Terminated: False | Episode Length: 4.859999999999941 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.488804140381313 | State: (-166.27345275878906, 2.1478078365325928, 183.1363525390625) | Terminated: False | Episode Length: 4.869999999999941 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.493835993798052 | State: (-166.44935607910156, 2.147569417953491, 183.16363525390625) | Terminated: False | Episode Length: 4.87999999999994 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.498803191125734 | State: (-166.62515258789062, 2.147320508956909, 183.19090270996094) | Terminated: False | Episode Length: 4.88999999999994 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.503705719445488 | State: (-166.80081176757812, 2.1470611095428467, 183.21817016601562) | Terminated: False | Episode Length: 4.89999999999994 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.508543567829155 | State: (-166.97637939453125, 2.1467912197113037, 183.2454376220703) | Terminated: False | Episode Length: 4.90999999999994 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.513316727336911 | State: (-167.15182495117188, 2.1465110778808594, 183.27272033691406) | Terminated: False | Episode Length: 4.9199999999999395 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.518025191014784 | State: (-167.32713317871094, 2.1462204456329346, 183.29998779296875) | Terminated: False | Episode Length: 4.929999999999939 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.522668953892044 | State: (-167.50233459472656, 2.1459193229675293, 183.3272705078125) | Terminated: False | Episode Length: 4.939999999999939 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.527248012978481 | State: (-167.67739868164062, 2.1456077098846436, 183.3545379638672) | Terminated: False | Episode Length: 4.949999999999939 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.53176236726157 | State: (-167.85235595703125, 2.1452858448028564, 183.38180541992188) | Terminated: False | Episode Length: 4.959999999999939 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.536212017703518 | State: (-168.0271759033203, 2.144953489303589, 183.40907287597656) | Terminated: False | Episode Length: 4.9699999999999385 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.540596967238195 | State: (-168.20188903808594, 2.14461088180542, 183.4363555908203) | Terminated: False | Episode Length: 4.979999999999938 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.544917220767954 | State: (-168.37646484375, 2.1442577838897705, 183.463623046875) | Terminated: False | Episode Length: 4.989999999999938 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.5491727851603345 | State: (-168.55091857910156, 2.1438944339752197, 183.49090576171875) | Terminated: False | Episode Length: 4.999999999999938 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.553363669244655 | State: (-168.72523498535156, 2.1435208320617676, 183.51817321777344) | Terminated: False | Episode Length: 5.009999999999938 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.557489883808492 | State: (-168.89942932128906, 2.143136739730835, 183.54544067382812) | Terminated: False | Episode Length: 5.019999999999937 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.561551441594043 | State: (-169.07350158691406, 2.142742395401001, 183.57272338867188) | Terminated: False | Episode Length: 5.029999999999937 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.565548357294388 | State: (-169.2474365234375, 2.1423375606536865, 183.5999755859375) | Terminated: False | Episode Length: 5.039999999999937 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.569480647549629 | State: (-169.42123413085938, 2.14192271232605, 183.62725830078125) | Terminated: False | Episode Length: 5.049999999999937 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.573348330942925 | State: (-169.5948944091797, 2.1414973735809326, 183.65452575683594) | Terminated: False | Episode Length: 5.0599999999999365 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.577151427996416 | State: (-169.76844787597656, 2.141062021255493, 183.6818084716797) | Terminated: False | Episode Length: 5.069999999999936 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.580889961167037 | State: (-169.9418487548828, 2.1406161785125732, 183.70907592773438) | Terminated: False | Episode Length: 5.079999999999936 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.584563954842223 | State: (-170.1151123046875, 2.140160083770752, 183.73635864257812) | Terminated: False | Episode Length: 5.089999999999936 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.588173435335511 | State: (-170.28823852539062, 2.1396939754486084, 183.7636260986328) | Terminated: False | Episode Length: 5.099999999999936 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.5917184308820245 | State: (-170.46124267578125, 2.1392176151275635, 183.7908935546875) | Terminated: False | Episode Length: 5.1099999999999355 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.59519897163386 | State: (-170.63409423828125, 2.138730764389038, 183.8181610107422) | Terminated: False | Episode Length: 5.119999999999935 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.5986150896553655 | State: (-170.80682373046875, 2.1382341384887695, 183.84544372558594) | Terminated: False | Episode Length: 5.129999999999935 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.60196681891831 | State: (-170.97940063476562, 2.1377270221710205, 183.87271118164062) | Terminated: False | Episode Length: 5.139999999999935 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.605254195296952 | State: (-171.15184020996094, 2.137209892272949, 183.89999389648438) | Terminated: False | Episode Length: 5.149999999999935 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.608477256563002 | State: (-171.3241424560547, 2.1366825103759766, 183.92726135253906) | Terminated: False | Episode Length: 5.159999999999934 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.61163604238048 | State: (-171.49630737304688, 2.1361451148986816, 183.9545440673828) | Terminated: False | Episode Length: 5.169999999999934 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.614730594300477 | State: (-171.66831970214844, 2.1355974674224854, 183.9818115234375) | Terminated: False | Episode Length: 5.179999999999934 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.617760955755804 | State: (-171.84017944335938, 2.135039806365967, 184.0090789794922) | Terminated: False | Episode Length: 5.189999999999934 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.620727172055546 | State: (-172.01190185546875, 2.134471893310547, 184.03634643554688) | Terminated: False | Episode Length: 5.199999999999934 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.623629290379519 | State: (-172.18348693847656, 2.133894205093384, 184.06362915039062) | Terminated: False | Episode Length: 5.209999999999933 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.626467359772615 | State: (-172.3549041748047, 2.1333062648773193, 184.0908966064453) | Terminated: False | Episode Length: 5.219999999999933 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.629241431139061 | State: (-172.5261993408203, 2.1327083110809326, 184.11817932128906) | Terminated: False | Episode Length: 5.229999999999933 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.63195155723657 | State: (-172.6973419189453, 2.1321001052856445, 184.14544677734375) | Terminated: False | Episode Length: 5.239999999999933 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.6345977926704 | State: (-172.86831665039062, 2.1314821243286133, 184.17271423339844) | Terminated: False | Episode Length: 5.2499999999999325 | C_L: 1.0\n", + "Action: [ 1. -0.524] | Reward: -6.637180193887314 | State: (-173.03915405273438, 2.1308541297912598, 183.87271118164062) | Terminated: False | Episode Length: 5.259999999999932 | C_L: 1.0\n", + "Action: [ 1. -0.524] | Reward: -6.639698779507123 | State: (-173.20994567871094, 2.130216121673584, 183.57272338867188) | Terminated: False | Episode Length: 5.269999999999932 | C_L: 1.0\n", + "Action: [ 1. -0.524] | Reward: -6.642153572672575 | State: (-173.38067626953125, 2.129568099975586, 183.27272033691406) | Terminated: False | Episode Length: 5.279999999999932 | C_L: 1.0\n", + "Action: [ 1. -0.524] | Reward: -6.644544601046346 | State: (-173.55136108398438, 2.1289100646972656, 182.97271728515625) | Terminated: False | Episode Length: 5.289999999999932 | C_L: 1.0\n", + "Action: [ 1. -0.524] | Reward: -6.6468718968073555 | State: (-173.7219696044922, 2.128242254257202, 182.67271423339844) | Terminated: False | Episode Length: 5.299999999999931 | C_L: 1.0\n", + "Action: [ 1. -0.524] | Reward: -6.649135496646419 | State: (-173.8925018310547, 2.1275644302368164, 182.3727264404297) | Terminated: False | Episode Length: 5.309999999999931 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.651335441761225 | State: (-174.0629425048828, 2.1268765926361084, 182.39999389648438) | Terminated: False | Episode Length: 5.319999999999931 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.653471799798766 | State: (-174.23324584960938, 2.126178741455078, 182.42726135253906) | Terminated: False | Episode Length: 5.329999999999931 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.655544640209343 | State: (-174.4033966064453, 2.1254711151123047, 182.45452880859375) | Terminated: False | Episode Length: 5.339999999999931 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.657554034239835 | State: (-174.57337951660156, 2.124753475189209, 182.4818115234375) | Terminated: False | Episode Length: 5.34999999999993 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.659500054926863 | State: (-174.7432098388672, 2.12402606010437, 182.5090789794922) | Terminated: False | Episode Length: 5.35999999999993 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.661382777089872 | State: (-174.9128875732422, 2.123288869857788, 182.53636169433594) | Terminated: False | Episode Length: 5.36999999999993 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.663202277324129 | State: (-175.08241271972656, 2.122541666030884, 182.56362915039062) | Terminated: False | Episode Length: 5.37999999999993 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.664958633993619 | State: (-175.25177001953125, 2.1217846870422363, 182.59091186523438) | Terminated: False | Episode Length: 5.3899999999999295 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.666651927223867 | State: (-175.4209747314453, 2.1210179328918457, 182.6181640625) | Terminated: False | Episode Length: 5.399999999999929 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.668282238894665 | State: (-175.58999633789062, 2.120241403579712, 182.64544677734375) | Terminated: False | Episode Length: 5.409999999999929 | C_L: 1.0\n", + "Action: [ 1. -0.524] | Reward: -6.669849652632717 | State: (-175.75888061523438, 2.119455099105835, 182.34544372558594) | Terminated: False | Episode Length: 5.419999999999929 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.671354229291479 | State: (-175.92767333984375, 2.1186587810516357, 182.3727264404297) | Terminated: False | Episode Length: 5.429999999999929 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.6727960557208705 | State: (-176.09629821777344, 2.1178529262542725, 182.39999389648438) | Terminated: False | Episode Length: 5.4399999999999284 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.674175220496997 | State: (-176.26475524902344, 2.117037534713745, 182.42726135253906) | Terminated: False | Episode Length: 5.449999999999928 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.675491813914488 | State: (-176.43304443359375, 2.1162121295928955, 182.45452880859375) | Terminated: False | Episode Length: 5.459999999999928 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.676745927978748 | State: (-176.60118103027344, 2.115377187728882, 182.4818115234375) | Terminated: False | Episode Length: 5.469999999999928 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.677937656398126 | State: (-176.76914978027344, 2.114532470703125, 182.5090789794922) | Terminated: False | Episode Length: 5.479999999999928 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.679067094576005 | State: (-176.9369354248047, 2.113677978515625, 182.53636169433594) | Terminated: False | Episode Length: 5.489999999999927 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.680134339602814 | State: (-177.10458374023438, 2.11281418800354, 182.56362915039062) | Terminated: False | Episode Length: 5.499999999999927 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.681139490247966 | State: (-177.2720489501953, 2.111940383911133, 182.59091186523438) | Terminated: False | Episode Length: 5.509999999999927 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.682082646951707 | State: (-177.4393310546875, 2.1110572814941406, 182.6181640625) | Terminated: False | Episode Length: 5.519999999999927 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.682963911816901 | State: (-177.60646057128906, 2.1101644039154053, 182.64544677734375) | Terminated: False | Episode Length: 5.5299999999999265 | C_L: 1.0\n", + "Action: [ 1. -0.524] | Reward: -6.683783388600736 | State: (-177.77340698242188, 2.109261989593506, 182.34544372558594) | Terminated: False | Episode Length: 5.539999999999926 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.684541158384494 | State: (-177.9402618408203, 2.1083500385284424, 182.3727264404297) | Terminated: False | Episode Length: 5.549999999999926 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.685237327971843 | State: (-178.10693359375, 2.107428789138794, 182.39999389648438) | Terminated: False | Episode Length: 5.559999999999926 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.685872005797397 | State: (-178.2734375, 2.1064977645874023, 182.42726135253906) | Terminated: False | Episode Length: 5.569999999999926 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.686445301918154 | State: (-178.43975830078125, 2.1055572032928467, 182.45452880859375) | Terminated: False | Episode Length: 5.5799999999999255 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.686957328004857 | State: (-178.6059112548828, 2.104607343673706, 182.4818115234375) | Terminated: False | Episode Length: 5.589999999999925 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.687408197333288 | State: (-178.77188110351562, 2.1036479473114014, 182.5090789794922) | Terminated: False | Episode Length: 5.599999999999925 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.6877980247754945 | State: (-178.9376983642578, 2.1026790142059326, 182.53636169433594) | Terminated: False | Episode Length: 5.609999999999925 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.688126926790943 | State: (-179.1033172607422, 2.101700782775879, 182.56362915039062) | Terminated: False | Episode Length: 5.619999999999925 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.688395021417609 | State: (-179.2687530517578, 2.100713014602661, 182.59091186523438) | Terminated: False | Episode Length: 5.629999999999924 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.688602428262999 | State: (-179.43402099609375, 2.0997159481048584, 182.6181640625) | Terminated: False | Episode Length: 5.639999999999924 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.68874926849511 | State: (-179.59910583496094, 2.0987095832824707, 182.64544677734375) | Terminated: False | Episode Length: 5.649999999999924 | C_L: 1.0\n", + "Action: [ 1. -0.524] | Reward: -6.688835664833324 | State: (-179.76400756835938, 2.097693681716919, 182.34544372558594) | Terminated: False | Episode Length: 5.659999999999924 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.688861717469255 | State: (-179.92880249023438, 2.0966687202453613, 182.3727264404297) | Terminated: False | Episode Length: 5.6699999999999235 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.688827551971928 | State: (-180.09339904785156, 2.0956342220306396, 182.39999389648438) | Terminated: False | Episode Length: 5.679999999999923 | C_L: 1.0\n", + "Action: [1. 0.048] | Reward: -6.6887933864746 | State: (-180.09339904785156, 2.0956342220306396, 182.39999389648438) | Terminated: True | Episode Length: 5.689999999999923 | C_L: 1.0\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "from utils.utils import get_optimal_action\n", + "\n", + "\n", + "\n", + "# generate a list of random initial states allowing for a wide range of values\n", + "np.random.seed(0)\n", + "initial_states = np.random.rand(5, 3)\n", + "initial_states[:, 0] = initial_states[:, 0] * np.deg2rad(90) - np.deg2rad(90) # Flight path angle\n", + "initial_states[:, 1] = initial_states[:, 1] * 3.3 + 0.7 # Airspeed\n", + "initial_states[:, 2] = initial_states[:, 2] * np.deg2rad(230) - np.deg2rad(30) # Bank angle\n", + "\n", + "initial_states = [np.array([np.deg2rad(-80.), 1.2, np.deg2rad(150)])] # Example state\n", + "\n", + "#initial_states = [np.array([np.deg2rad(-80.), 1.2, np.deg2rad(150)])] # Example state\n", + "fig, ax1 = plt.subplots(figsize=(10, 5)) # Main figure\n", + "\n", + "ax2 = ax1.twinx() # Create secondary y-axis\n", + "\n", + "# Loop through each initial state\n", + "for initial_state in initial_states:\n", + " state = np.array(initial_state) # Convert list to numpy array\n", + " glider.airplane.flight_path_angle = state[0]\n", + " glider.airplane.airspeed_norm = state[1]\n", + " glider.airplane.bank_angle = state[2]\n", + "\n", + " # Storage for plotting\n", + " flight_path_angles = []\n", + " time_steps = []\n", + " cl_values = []\n", + " height_lost_values = []\n", + "\n", + " total_height_lost = 0\n", + " episode_length = 0\n", + " terminated = False\n", + "\n", + " # Run simulation\n", + " while episode_length < 150:\n", + "\n", + " if not terminated:\n", + " try:\n", + " action = get_optimal_action(state, pi)\n", + " #action[0] = 1.0\n", + " state, reward, terminated, _, _ = glider.step(action)\n", + " state = state[0]\n", + " except:\n", + " terminated = True\n", + " \n", + "\n", + " total_height_lost += reward # Update height lost\n", + " episode_length += 0.01\n", + "\n", + " # Convert to readable format\n", + " flight_path_angle = float(np.rad2deg(state[0]))\n", + " V_norm = float(state[1])\n", + " bank_angle = float(np.rad2deg(state[2]))\n", + " C_L = float(action[0]) # Extract lift coefficient\n", + "\n", + " # Store values\n", + " flight_path_angles.append(flight_path_angle)\n", + " time_steps.append(episode_length)\n", + " cl_values.append(C_L)\n", + " height_lost_values.append(float(np.rad2deg(state[2])))\n", + "\n", + " print(f\"Action: {np.round(action,3)} | Reward: {total_height_lost} | \\\n", + " State: {flight_path_angle, V_norm, bank_angle} | Terminated: {terminated} |\\\n", + " Episode Length: {episode_length} | C_L: {C_L}\")\n", + "\n", + " if terminated:\n", + " break\n", + "\n", + " # Plot the flight path angle on primary axis\n", + " ax1.plot(time_steps, flight_path_angles,label=f\"V_norm={round(initial_state[1],2)}, Bank_angle={round(np.rad2deg(initial_state[2]),1)}°\") \n", + " # Plot height lost on secondary axis\n", + " ax2.plot(time_steps, height_lost_values, linestyle=\"dashed\", alpha=0.7)\n", + "\n", + " # Select 5 evenly spaced indices for C_L annotations\n", + " num_points = 5\n", + " if len(time_steps) > num_points:\n", + " indices = np.linspace(0, len(time_steps) - 1, num_points, dtype=int)\n", + " else:\n", + " indices = range(len(time_steps)) # If fewer than 5 points exist\n", + "\n", + " # Plot markers and add annotations for C_L\n", + " for i in indices:\n", + " ax1.scatter(time_steps[i], flight_path_angles[i], color=\"black\", marker=\"x\") # Mark point\n", + " ax1.text(time_steps[i], flight_path_angles[i], f\"{cl_values[i]:.2f}\", fontsize=7, \n", + " verticalalignment='bottom', \n", + " horizontalalignment='right')\n", + "\n", + "# Graph settings\n", + "ax1.set_xlabel(\"Time (seconds)\")\n", + "ax1.set_ylabel(\"Flight Path Angle (γ) [°]\", color=\"blue\")\n", + "ax2.set_ylabel(\"Bank Angle (μ) [°])\", color=\"red\")\n", + "ax1.set_title(\"Airplane trajectories\")\n", + "ax1.legend()\n", + "ax1.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "bc4b24f4", + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import matplotlib.colors as mcolors\n", + "from utils.utils import get_optimal_action\n", + "\n", + "vel_norm = 1.2\n", + "# Example discretization\n", + "flight_path_bins = np.linspace(np.deg2rad(-100), np.deg2rad(0), 30, dtype=np.float32)\n", + "bank_bins = np.linspace(np.deg2rad(0), np.deg2rad(180), 30, dtype=np.float32)\n", + "\n", + "# Prepare a 2D array to store the policy (CL values)\n", + "policy_values = np.zeros((len(bank_bins), len(flight_path_bins))) # Transpose shape\n", + "\n", + "# Fill in the 2D array by evaluating your policy at each (γ, μ)\n", + "for i, mu_rad in enumerate(bank_bins): # μ is now columns\n", + " for j, gamma_rad in enumerate(flight_path_bins): # γ is now rows\n", + " state = np.array([gamma_rad, vel_norm, mu_rad])\n", + " action = get_optimal_action(state, pi) # Replace with your actual policy call\n", + " cl = float(action[0]) # Assuming first element of `action` is the lift coefficient\n", + " policy_values[i, j] = cl\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "d76a9ff0", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Create the plot\n", + "plt.figure(figsize=(8, 6))\n", + "plt.imshow(policy_values.T, cmap=\"gray\", origin=\"lower\",\n", + " extent=[np.rad2deg(bank_bins[0]), np.rad2deg(bank_bins[-1]), \n", + " np.rad2deg(flight_path_bins[0]), np.rad2deg(flight_path_bins[-1])], interpolation=\"nearest\")\n", + "\n", + "# ValueError: 'nearet' is not a valid value for interpolation; supported values are 'lanczos', \n", + "# 'bilinear', 'bessel', 'spline36', 'catrom', 'none', 'spline16', 'gaussian', 'hamming', 'antialiased', \n", + "# 'hermite', 'mitchell', 'blackman', 'quadric', 'hanning', 'sinc', 'kaiser', 'bicubic', 'nearest'\n", + "\n", + "\n", + "# Add labels and colorbar\n", + "plt.xlabel(\"Bank Angle μ (degrees)\") # Now X-axis\n", + "plt.ylabel(\"Flight Path Angle γ (degrees)\") # Now Y-axis\n", + "plt.colorbar(label=\"Lift Coefficient (C_L)\")\n", + "plt.title(f\"Policy Visualization V/Vs={vel_norm}\")\n", + "\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "81f11551", + "metadata": {}, + "outputs": [], + "source": [ + "state = np.array([np.deg2rad(-80.), 1.2, np.deg2rad(150)]) # Example state\n", + "get_optimal_action(state, pi)[0] # Replace with your actual policy call" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f3dd56bb", + "metadata": {}, + "outputs": [], + "source": [ + "# Normalize the policy values (0 to 1) for grayscale plotting\n", + "vmin, vmax = policy_values.min(), policy_values.max()\n", + "norm_values = (policy_values - vmin) / (vmax - vmin + 1e-8)\n", + "\n", + "# Create a mesh for plotting\n", + "X, Y = np.meshgrid(bank_angles, flight_path_angles)\n", + "\n", + "# Plot using pcolormesh (grayscale)\n", + "fig, ax = plt.subplots(figsize=(6, 4))\n", + "c = ax.pcolormesh(X, Y, norm_values, cmap='gray', shading='auto', norm=mcolors.Normalize(vmin=0, vmax=1))\n", + "\n", + "# Labels and title\n", + "ax.set_xlabel(\"Bank Angle (deg)\")\n", + "ax.set_ylabel(\"Flight Path Angle (deg)\")\n", + "ax.set_title(\"Optimal Policy (CL)\")\n", + "\n", + "# Optional: colorbar\n", + "plt.colorbar(c, ax=ax, label=\"Normalized CL\")\n", + "\n", + "plt.show()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f8323f6d", + "metadata": {}, + "outputs": [], + "source": [ + "-192769.51 + -125086.89" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e954bd11", + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "from utils.utils import get_optimal_action\n", + "\n", + "# initial state\n", + "state = np.array([np.deg2rad(-60), 1.2, np.deg2rad(30)])\n", + "glider.airplane.flight_path_angle = state[0]\n", + "glider.airplane.airspeed_norm = state[1]\n", + "glider.airplane.bank_angle = state[2]\n", + "\n", + "total_height_lost = 0\n", + "episode_length = 0\n", + "terminated = False\n", + "\n", + "time_history = []\n", + "height_lost_history = []\n", + "\n", + "while episode_length < 20:\n", + " action = get_optimal_action(state, pi)\n", + " prev_state = state.copy()\n", + " state, reward, terminated, _, _ = glider.step(action)\n", + " state = state[0] \n", + " total_height_lost += reward\n", + " episode_length += 0.01\n", + "\n", + " time_history.append(episode_length) # for plotting!\n", + " height_lost_history.append(total_height_lost)\n", + "\n", + " #print(f\"Action: {np.round(action,3)} | Lost Height: {total_height_lost:.3f} | State: {state} | Terminated: {terminated} | Time: {episode_length:.2f}\")\n", + " if terminated:\n", + " break\n", + "\n", + "# Plotting the results\n", + "plt.figure(figsize=(10, 6))\n", + "plt.plot(time_history, height_lost_history, label=\"Height Lost\")\n", + "plt.xlabel(\"Time (s)\")\n", + "plt.ylabel(\"Height Lost (m)\")\n", + "plt.title(\"Height Lost Over Time\")\n", + "plt.legend()\n", + "plt.grid(True)\n", + "plt.show()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5cd13daf", + "metadata": {}, + "outputs": [], + "source": [ + "STALL_AIRSPEED = 27.331231856346\n", + "from utils.utils import get_optimal_action\n", + "from tqdm import tqdm\n", + "with open(glider.__class__.__name__ + \".pkl\", \"rb\") as f:\n", + " pi: PolicyIteration = pickle.load(f)\n", + "\n", + "prom_episode_lenght = 0\n", + "dict_result = {}\n", + "dict_episode_length = {}\n", + "state_spaces = [v for v in pi.states_space if v[0] > -np.pi/2]\n", + "for state in tqdm(state_spaces):\n", + " initial_state = state.copy()\n", + " prev_state = state.copy()\n", + " glider.reset()\n", + " glider.airplane.flight_path_angle = state[0]\n", + " glider.airplane.airspeed_norm = state[1]\n", + " glider.airplane.bank_angle = state[2]\n", + " done = False\n", + " episode_length = 0\n", + " total_reward = 0\n", + " while not done:\n", + " action = get_optimal_action(state, pi)\n", + " prev_state = state.copy()\n", + " state, reward, done, _, _ = glider.step(action)\n", + " done = bool(done)\n", + " if done:\n", + " break\n", + " state = state[0]\n", + " # check if our state is in the state space\n", + " index = pi.triangulation.find_simplex(state)\n", + " if not done:\n", + " total_reward -= reward#*STALL_AIRSPEED\n", + " if (index ==-1) or episode_length > 70: \n", + " done = True\n", + " episode_length += 1\n", + " \n", + " dict_result[tuple(initial_state)] = total_reward\n", + " dict_episode_length[tuple(initial_state)] = episode_length\n", + " prom_episode_lenght += episode_length / len(pi.states_space)\n", + " #print(f\"Initial state: {initial_state} - Total reward: {total_reward}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ab055601", + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "epsilon = 1*1e-1\n", + "#dict_result = dict_episode_length\n", + "# Your dictionary with keys as coordinates (x, y) and values as the color intensity (z)\n", + "# Extracting the x, y and z values\n", + "x = [np.degrees(coord[0]) for coord in dict_result.keys() if 0.05 >= coord[0] >= -np.pi/2 - epsilon and coord[1] > 0.7]\n", + "#convert x to grad\n", + "y = [coord[1] for coord in dict_result.keys() if 0.05 >= coord[0] >= -np.pi/2 - epsilon and coord[1] > 0.7]\n", + "#convert y to Vs\n", + "keys_list = list(dict_result.keys())\n", + "values_list = list(dict_result.values())\n", + "z = [v for e,v in zip(keys_list, values_list) if 0.05 >= e[0] >= -np.pi/2 - epsilon and e[1] > 0.7]\n", + "\n", + "# Creating a 2D scatter plot\n", + "cmap = plt.get_cmap('viridis', 2048)\n", + "plt.tricontourf(y, x, z, cmap=cmap, levels=18) # Change 'viridis' to any other colormap you like\n", + "plt.colorbar(label='', shrink=0.8, ) # Add color bar for the z values\n", + "\n", + "# Add labels and a title\n", + "plt.ylabel('Flight Path Angle (γ) [deg]')\n", + "plt.xlabel('V/Vs')\n", + "\n", + "x_min, x_max = min(x), max(x)\n", + "y_min, y_max = min(y), max(y)\n", + "\n", + "# Set the minor ticks for the grid, keeping the dense grid with smaller squares\n", + "ax = plt.gca() # Get current axes\n", + "ax.set_yticks(np.linspace(x_min, x_max, 60), minor=True) # 20 minor ticks for grid\n", + "ax.set_xticks(np.linspace(y_min, y_max, 60), minor=True) # 20 minor ticks for grid\n", + "# put line x = 1\n", + "plt.axvline(x=1, color='r', linestyle='--', linewidth=0.5)\n", + "# put line y = 0\n", + "#plt.axhline(y=0, color='k', linestyle='--')\n", + "\n", + "# Show the plot\n", + "vals = np.round(np.linspace(-90, 0,6))\n", + "plt.yticks(vals) # Only 5 labels on x-axis\n", + "plt.xticks(np.linspace(round(y_min), round(y_max), 6)) # Only 5 labels on y-axis\n", + "# Enable the minor grid lines\n", + "ax.grid(which='minor', color='gray', linestyle='-', linewidth=0.5)\n", + "\n", + "# Habilitar la cuadrícula en las marcas menores\n", + "ax.grid(which='minor', color='gray', linestyle='-', linewidth=0.5)\n", + "\n", + "# Guardar la imagen sin borde blanco\n", + "plt.savefig('output.png', bbox_inches='tight')\n", + "\n", + "# set size of the plot\n", + "plt.gcf().set_size_inches(9, 5)\n", + "\n", + "plt.show()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c8416867", + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "from scipy.interpolate import griddata\n", + "\n", + "# Your dictionary with keys as coordinates (x, y) and values as the color intensity (z)\n", + "# Extracting the x, y and z values\n", + "x = [np.degrees(coord[0]) for coord in dict_result.keys() if 5 >= coord[0] >= -np.pi/2 and coord[1] > 0.7]\n", + "y = [coord[1] for coord in dict_result.keys() if 5 >= coord[0] >= -np.pi/2 and coord[1] > 0.7]\n", + "keys_list = list(dict_result.keys())\n", + "values_list = list(dict_result.values())\n", + "z = [v for e,v in zip(keys_list, values_list) if 5 >= e[0] >= -np.pi/2 and e[1] > 0.7]\n", + "\n", + "# Create a grid for interpolation\n", + "x_grid = np.linspace(min(x), max(x), 100)\n", + "y_grid = np.linspace(min(y), max(y), 100)\n", + "X, Y = np.meshgrid(x_grid, y_grid)\n", + "\n", + "# Interpolate the scattered data into the grid\n", + "Z = griddata((y, x), z, (Y, X), method='linear')\n", + "\n", + "# Create the scatter plot\n", + "cmap = plt.get_cmap('viridis', 2048)\n", + "plt.scatter(y, x, c=z, cmap=cmap)\n", + "\n", + "# Add color bar\n", + "plt.colorbar(label='', shrink=0.8)\n", + "contour_levels = np.linspace(np.min(z), np.max(z), 10) # 10 levels\n", + "contour_levels = contour_levels[contour_levels != 0] # Remove zero from levels\n", + "\n", + "# Add contour lines on top of the scatter plot\n", + "contour = plt.contour(Y, X, Z, levels=contour_levels, colors='black', linewidths=0.75)\n", + "plt.clabel(contour, inline=True, fontsize=8) # Optional: add labels to the contours\n", + "\n", + "# Add labels and a title\n", + "plt.ylabel('Flight Path Angle (γ) [deg]')\n", + "plt.xlabel('V/Vs')\n", + "\n", + "# Plot vertical line at x=1\n", + "plt.axvline(x=1, color='k', linestyle='--')\n", + "\n", + "# Configure the minor ticks and grid\n", + "x_min, x_max = min(x), max(x)\n", + "y_min, y_max = min(y), max(y)\n", + "ax = plt.gca() # Get current axes\n", + "ax.set_yticks(np.linspace(x_min, x_max, 60), minor=True)\n", + "ax.set_xticks(np.linspace(y_min, y_max, 60), minor=True)\n", + "plt.yticks(np.linspace(x_min, x_max, 6)) # Major ticks on y-axis\n", + "plt.xticks(np.linspace(y_min, y_max, 4)) # Major ticks on x-axis\n", + "\n", + "# Set size of the plot\n", + "plt.gcf().set_size_inches(9, 5)\n", + "\n", + "# Enable the minor grid lines\n", + "ax.grid(which='minor', color='white', linestyle='-', linewidth=0.25)\n", + "\n", + "# Habilitar la cuadrícula en las marcas menores\n", + "ax.grid(which='minor', color='white', linestyle='-', linewidth=0.25)\n", + "\n", + "# Save the image\n", + "plt.savefig('output_with_contours.png', bbox_inches='tight')\n", + "\n", + "# Show the plot\n", + "plt.show()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "204e0318", + "metadata": {}, + "outputs": [], + "source": [] + }, { "cell_type": "markdown", "id": "3c8fe4a4", @@ -172,7 +1403,7 @@ "provenance": [] }, "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "DynamicProgramming", "language": "python", "name": "python3" },