""" 模型评估示例 """ import os import numpy as np import gym from stable_baselines3 import PPO # 添加项目根目录到Python路径 import sys sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from src.rl_env.cartesian_env import CartesianSpaceEnv def evaluate_model(model_path, num_episodes=5): """ 评估训练好的模型 参数: model_path: 模型文件路径 num_episodes: 要运行的测试回合数 """ # 创建环境 env = CartesianSpaceEnv() # 加载模型 model = PPO.load(model_path) print(f"开始评估,共 {num_episodes} 个episode") for episode in range(num_episodes): obs, _ = env.reset() done = False total_reward = 0.0 steps = 0 print(f"\nEpisode {episode + 1}/{num_episodes}") print(f"目标位置: {env._target_pos}") while not done: action, _states = model.predict(obs, deterministic=True) obs, reward, done, truncated, info = env.step(action) total_reward += reward steps += 1 # 显示中间步骤信息 if steps % 10 == 0: distance = np.linalg.norm(env._target_pos - env._current_pos) print(f"Step {steps}: 距离={distance:.4f}, 累计奖励={total_reward:.4f}") # 渲染最后一步 if done or truncated: env.render() distance = np.linalg.norm(env._target_pos - env._current_pos) print(f"\n最终结果:") print(f"总步数: {steps}") print(f"最终距离: {distance:.4f}") print(f"总奖励: {total_reward:.4f}") env.close() if __name__ == "__main__": # 使用最佳模型进行评估 MODEL_PATH = "models/best_model/best_model" # 运行评估 evaluate_model(MODEL_PATH)