2025-05-25 11:25:02 +00:00
|
|
|
"""
|
|
|
|
模型评估示例
|
|
|
|
"""
|
|
|
|
import os
|
|
|
|
import numpy as np
|
|
|
|
import gym
|
|
|
|
from stable_baselines3 import PPO
|
|
|
|
|
|
|
|
# 添加项目根目录到Python路径
|
|
|
|
import sys
|
|
|
|
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
|
|
|
|
|
|
from src.rl_env.cartesian_env import CartesianSpaceEnv
|
|
|
|
|
|
|
|
def evaluate_model(model_path, num_episodes=5):
|
|
|
|
"""
|
|
|
|
评估训练好的模型
|
|
|
|
|
|
|
|
参数:
|
|
|
|
model_path: 模型文件路径
|
|
|
|
num_episodes: 要运行的测试回合数
|
|
|
|
"""
|
|
|
|
# 创建环境
|
|
|
|
env = CartesianSpaceEnv()
|
|
|
|
|
|
|
|
# 加载模型
|
|
|
|
model = PPO.load(model_path)
|
|
|
|
|
|
|
|
print(f"开始评估,共 {num_episodes} 个episode")
|
|
|
|
|
|
|
|
for episode in range(num_episodes):
|
|
|
|
obs, _ = env.reset()
|
|
|
|
done = False
|
|
|
|
total_reward = 0.0
|
|
|
|
steps = 0
|
|
|
|
|
|
|
|
print(f"\nEpisode {episode + 1}/{num_episodes}")
|
|
|
|
print(f"目标位置: {env._target_pos}")
|
|
|
|
|
|
|
|
while not done:
|
|
|
|
action, _states = model.predict(obs, deterministic=True)
|
|
|
|
obs, reward, done, truncated, info = env.step(action)
|
|
|
|
total_reward += reward
|
|
|
|
steps += 1
|
|
|
|
|
|
|
|
# 显示中间步骤信息
|
|
|
|
if steps % 10 == 0:
|
|
|
|
distance = np.linalg.norm(env._target_pos - env._current_pos)
|
|
|
|
print(f"Step {steps}: 距离={distance:.4f}, 累计奖励={total_reward:.4f}")
|
|
|
|
|
|
|
|
# 渲染最后一步
|
|
|
|
if done or truncated:
|
|
|
|
env.render()
|
|
|
|
distance = np.linalg.norm(env._target_pos - env._current_pos)
|
|
|
|
print(f"\n最终结果:")
|
|
|
|
print(f"总步数: {steps}")
|
|
|
|
print(f"最终距离: {distance:.4f}")
|
|
|
|
print(f"总奖励: {total_reward:.4f}")
|
|
|
|
|
|
|
|
env.close()
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
# 使用最佳模型进行评估
|
2025-05-26 10:01:38 +00:00
|
|
|
MODEL_PATH = "models/best_model/best_model"
|
2025-05-25 11:25:02 +00:00
|
|
|
|
|
|
|
# 运行评估
|
|
|
|
evaluate_model(MODEL_PATH)
|