From e3b17f5eb21c289983c9e97be25a99f8a8532817 Mon Sep 17 00:00:00 2001 From: fly6516 Date: Tue, 27 May 2025 21:55:49 +0800 Subject: [PATCH] =?UTF-8?q?refactor(env):=20=E4=BC=98=E5=8C=96=20DOA=5FSAC?= =?UTF-8?q?=5Fsim2real=20=E8=84=9A=E6=9C=AC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 修改了 URDF 文件路径 - 删除了冗余代码和注释 - 调整了代码格式和缩进,提高了可读性 - 更新了模型保存路径 --- DOA_SAC_sim2real.py | 55 +++++++++++++++++++++++---------------------- 1 file changed, 28 insertions(+), 27 deletions(-) diff --git a/DOA_SAC_sim2real.py b/DOA_SAC_sim2real.py index 7e2627c..f87df9f 100644 --- a/DOA_SAC_sim2real.py +++ b/DOA_SAC_sim2real.py @@ -17,13 +17,16 @@ import matplotlib as mpl from mpl_toolkits.mplot3d import Axes3D import matplotlib.pyplot as plt import pandas as pd -os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE" + +os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE" MAX_EPISODE_LEN = 20 * 100 x = [] y = [] z = [] -#运动模式 + + +# 运动模式 # PI=3.1415926 # ABS = 0 # 绝对运动 # INCR = 1 # 增量运动 @@ -122,12 +125,9 @@ class jakaEnv(gym.Env): p.resetBaseVelocity(self.blockId, linearVelocity=[0, random.choice(self.speed), 0]) p.stepSimulation() # robot.servo_move_enable(True) - + # robot.servo_p(cartesian_pose=[dx*100, dy*100, dz*50, 0, 0, 0], move_mode=1) # time.sleep(0.008) - - - state_object, state_object_orienation = p.getBasePositionAndOrientation(self.objectId) twist_object, twist_object_orienation = p.getBaseVelocity(self.objectId) @@ -282,7 +282,7 @@ class jakaEnv(gym.Env): reset_realposes = [0.8, 2.4, 1.3, 1, -1.57, 0] # 改变路径为你机械臂的URDF文件路径 self.jaka_id = p.loadURDF( - r"D:\Python\robot_DRL\env\lib64\urdf\jaka_description\urdf\jaka_description.urdf", + "urdf/jaka_description/urdf/jaka_description.urdf", basePosition=[0, 0.5, 0.65], baseOrientation=p.getQuaternionFromEuler([0, 0, 3.14]), useFixedBase=True, @@ -308,7 +308,7 @@ class jakaEnv(gym.Env): block_vel_range = np.array(self.speed) block_loc = np.array([0, 0, 0]) block_rel_pos = np.array([0, 0, 0]) - block2_pos = np.array([0.6, 0.66, 0.43]) + block2_pos = np.array([0.6, 0.66, 0.43]) obs = np.concatenate( [ @@ -425,6 +425,7 @@ if __name__ == "__main__": It must contains the file created by the ``Monitor`` wrapper. :param verbose: Verbosity level. """ + def __init__(self, check_freq: int, log_dir: str, verbose: int = 1): super(SaveOnBestTrainingRewardCallback, self).__init__(verbose) self.check_freq = check_freq @@ -440,32 +441,34 @@ if __name__ == "__main__": def _on_step(self) -> bool: if self.n_calls % self.check_freq == 0: - # Retrieve training reward - x, y = ts2xy(load_results(self.log_dir), 'timesteps') - if len(x) > 0: - # Mean training reward over the last 100 episodes - mean_reward = np.mean(y[-100:]) - if self.verbose > 0: - print(f"Num timesteps: {self.num_timesteps}") - print(f"Best mean reward: {self.best_mean_reward:.2f} - Last mean reward per episode: {mean_reward:.2f}") + # Retrieve training reward + x, y = ts2xy(load_results(self.log_dir), 'timesteps') + if len(x) > 0: + # Mean training reward over the last 100 episodes + mean_reward = np.mean(y[-100:]) + if self.verbose > 0: + print(f"Num timesteps: {self.num_timesteps}") + print( + f"Best mean reward: {self.best_mean_reward:.2f} - Last mean reward per episode: {mean_reward:.2f}") - # New best model, you could save the agent here - if mean_reward > self.best_mean_reward: - self.best_mean_reward = mean_reward - # Example for saving best model - if self.verbose > 0: - print(f"Saving new best model to {self.save_path}") - self.model.save(self.save_path) + # New best model, you could save the agent here + if mean_reward > self.best_mean_reward: + self.best_mean_reward = mean_reward + # Example for saving best model + if self.verbose > 0: + print(f"Saving new best model to {self.save_path}") + self.model.save(self.save_path) return True + tempt = 1 log_dir = './tensorboard/DOA_SAC_callback/' os.makedirs(log_dir, exist_ok=True) env = jakaEnv() env = Monitor(env, log_dir) if tempt: - model = SAC('MlpPolicy', env=env, verbose=1, tensorboard_log=log_dir, + model = SAC('MlpPolicy', env=env, verbose=1, tensorboard_log=log_dir, device="cuda" ) callback = SaveOnBestTrainingRewardCallback(check_freq=1000, log_dir=log_dir) @@ -476,8 +479,7 @@ if __name__ == "__main__": else: obs = env.reset() # 改变路径为你保存模型的路径 - model = SAC.load(r'C:\Users\fly\PycharmProjects\RL-PowerTracking-new\best_model.zip', env=env) - + model = SAC.load(r'best_model.zip', env=env) for j in range(50): for i in range(2000): @@ -498,7 +500,6 @@ if __name__ == "__main__": break break - # 三维 # fig1 = plt.figure("机械臂运行轨迹") # ax = fig1.add_subplot(projection="3d") # 三维图形