RL-PowerTracking/examples/evaluate_model.py

67 lines
1.9 KiB
Python
Raw Normal View History

"""
模型评估示例
"""
import os
import numpy as np
import gym
from stable_baselines3 import PPO
# 添加项目根目录到Python路径
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from src.rl_env.cartesian_env import CartesianSpaceEnv
def evaluate_model(model_path, num_episodes=5):
"""
评估训练好的模型
参数:
model_path: 模型文件路径
num_episodes: 要运行的测试回合数
"""
# 创建环境
env = CartesianSpaceEnv()
# 加载模型
model = PPO.load(model_path)
print(f"开始评估,共 {num_episodes} 个episode")
for episode in range(num_episodes):
obs, _ = env.reset()
done = False
total_reward = 0.0
steps = 0
print(f"\nEpisode {episode + 1}/{num_episodes}")
print(f"目标位置: {env._target_pos}")
while not done:
action, _states = model.predict(obs, deterministic=True)
obs, reward, done, truncated, info = env.step(action)
total_reward += reward
steps += 1
# 显示中间步骤信息
if steps % 10 == 0:
distance = np.linalg.norm(env._target_pos - env._current_pos)
print(f"Step {steps}: 距离={distance:.4f}, 累计奖励={total_reward:.4f}")
# 渲染最后一步
if done or truncated:
env.render()
distance = np.linalg.norm(env._target_pos - env._current_pos)
print(f"\n最终结果:")
print(f"总步数: {steps}")
print(f"最终距离: {distance:.4f}")
print(f"总奖励: {total_reward:.4f}")
env.close()
if __name__ == "__main__":
# 使用最佳模型进行评估
MODEL_PATH = "models/best_model"
# 运行评估
evaluate_model(MODEL_PATH)