diff --git a/DOA_SAC_sim2real.py b/DOA_SAC_sim2real.py index 9a9bd07..b0bfa9d 100644 --- a/DOA_SAC_sim2real.py +++ b/DOA_SAC_sim2real.py @@ -1,3 +1,6 @@ +import torch +import torch.nn as nn +from torch.multiprocessing import Process, Queue, set_start_method import time import gym from gym import error, spaces, utils @@ -406,67 +409,6 @@ class jakaEnv(gym.Env): if __name__ == "__main__": - from torch.multiprocessing import Pool, Process, set_start_method - try: - set_start_method('spawn') - except RuntimeError: - pass - - def train_sac(gpu_id): - os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu_id) - - # Number of environments per GPU - num_envs = 4 - - def make_env(): - env = jakaEnv() - return Monitor(env, log_dir) - - # Create vectorized environments - vec_env = DummyVecEnv([make_env for _ in range(num_envs)]) - - # Normalize observations and rewards - vec_env = VecNormalize(vec_env, norm_obs=True, norm_reward=True) - - # Dynamic batch size based on number of environments - batch_size = 512 * num_envs - - model = SAC( - 'MlpPolicy', - env=vec_env, - verbose=0, - tensorboard_log=log_dir, - device="cuda", - batch_size=batch_size, - gradient_steps=4, - ent_coef='auto', - learning_rate=3e-4, - use_tensorboard=True - ) - - callback = SaveOnBestTrainingRewardCallback(check_freq=1000, log_dir=log_dir) - - # Train with dynamic total timesteps based on environment complexity - total_timesteps = 4000000 * num_envs - - model.learn( - total_timesteps=total_timesteps, - callback=callback, - tb_log_name=f"SAC_GPU{gpu_id}_ENV" - ) - model.save(os.path.join(log_dir, f'best_model_gpu{gpu_id}')) - - # Number of GPUs to use (adjust based on your system) - num_gpus = 2 - processes = [] - - for gpu_id in range(num_gpus): - p = Process(target=train_sac, args=(gpu_id,)) - p.start() - processes.append(p) - - for p in processes: - p.join() from stable_baselines3 import SAC from stable_baselines3.common import results_plotter @@ -474,7 +416,7 @@ if __name__ == "__main__": from stable_baselines3.common.results_plotter import load_results, ts2xy, plot_results from stable_baselines3.common.noise import NormalActionNoise from stable_baselines3.common.callbacks import BaseCallback - from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize + class SaveOnBestTrainingRewardCallback(BaseCallback): """ @@ -591,4 +533,55 @@ if __name__ == "__main__": # plt.show() +def train_parallel(num_processes): + """多进程并行训练函数""" + set_start_method('spawn') + + # 创建共享模型(使用Stable Baselines3的SAC) + env = jakaEnv() # 创建环境实例 + model = SAC('MlpPolicy', env=env, verbose=1, device="cuda") # 使用CUDA加速 + shared_model = model.policy.to(torch.device('cuda')) # 确保模型在GPU上 + shared_model.share_memory() # 共享模型参数 + + # 创建进程列表 + processes = [] + for rank in range(num_processes): + p = Process(target=train_process, args=(rank, shared_model)) + p.start() + processes.append(p) + + # 等待所有进程完成 + for p in processes: + p.join() + +def train_process(rank, shared_model): + """单个训练进程""" + # 为每个进程分配独立GPU + device = torch.device(f'cuda:{rank % torch.cuda.device_count()}') + + # 创建独立环境实例 + env = create_arm_environment() # 替换为实际的环境创建函数 + + # 创建本地模型副本 + local_model = SAC_Model().to(device) + local_model.load_state_dict(shared_model.state_dict()) + + # 在此处替换原有训练循环为并行版本 + while True: + # 训练本地模型... + + # 同步参数到共享模型 + with torch.no_grad(): + for param, shared_param in zip(local_model.parameters(), shared_model.parameters()): + shared_param.copy_(param) + + +def create_arm_environment(): + """创建机械臂环境实例""" + return jakaEnv() # 返回机械臂环境实例 + +if __name__ == '__main__': + # 启动并行训练(使用4个进程为例) + train_parallel(4) + \ No newline at end of file