import numpy as np # Import NumPy for numerical operations import random # Import random module for exploration import multiprocessing as mp # Import multiprocessing for parallel computing #import cupy as cp try: import cupy as cp # Import CuPy for GPU acceleration GPU_AVAILABLE = True # Set flag if CuPy is available except ImportError: GPU_AVAILABLE = False # Set flag if CuPy is not available # Define the environment grid_size = (4, 4) # Grid size (4x4) start = (0, 0) # Start position end = (3, 3) # End position obstacles = {(1, 0), (2, 1), (1, 2), (0, 3), (3, 2)} # Set of obstacles # Define possible actions and their effects on position actions = {'up': (-1, 0), 'down': (1, 0), 'left': (0, -1), 'right': (0, 1)} def is_valid(state): """Check if a state is within the grid and not an obstacle.""" return (0 <= state[0] < grid_size[0]) and (0 <= state[1] < grid_size[1]) and (state not in obstacles) def get_next_state(state, action): """Get the next state based on the current state and action.""" new_state = (state[0] + actions[action][0], state[1] + actions[action][1]) return new_state if is_valid(new_state) else state # Q-Learning parameters alpha = 0.5 # Learning rate gamma = 0.9 # Discount factor epsilon = 0.1 # Increased exploration rate for better exploration episodes = 5000 # Increased total training episodes for better learning np.random.seed(42) # Set random seed for reproducibility random.seed(42) # Initialize the Q-table with all states and actions grid_states = [(i, j) for i in range(grid_size[0]) for j in range(grid_size[1]) if (i, j) not in obstacles] Q = {state: {action: 0 for action in actions} for state in grid_states} # GPU acceleration setup (if available) if GPU_AVAILABLE: Q = {state: {action: 0.0 for action in actions} for state in grid_states} # Initialize Q-table on GPU actions_list = list(actions.keys()) # Store actions as a list def train_q_learning(_): """Function to train Q-learning in parallel using multiple processes.""" local_Q = {state: Q[state].copy() for state in grid_states} # Create a local copy of Q-table for _ in range(episodes // mp.cpu_count()): # Each process handles a fraction of episodes state = start # Start at the initial position while state != end: # Run until reaching the goal # Choose an action using ε-greedy policy action = max(local_Q[state], key=local_Q[state].get) if random.uniform(0, 1) > epsilon else random.choice( list(actions)) next_state = get_next_state(state, action) # Get the next state reward = 1 if next_state == end else -0.1 # Define rewards # Update Q-value using the Bellman equation local_Q[state][action] += alpha * ( reward + gamma * max(local_Q[next_state].values()) - local_Q[state][action]) state = next_state # Move to next state return local_Q # Return the updated local Q-table # Parallel Q-learning training if __name__ == "__main__": num_processes = max(1, mp.cpu_count() // 2) # Use half the available CPU cores with mp.Pool(num_processes) as pool: # Create a process pool with reduced number of CPU cores results = pool.map(train_q_learning, range(num_processes)) # Distribute training across multiple processes # Merge Q-tables from all processes for state in grid_states: for action in actions: Q[state][action] = sum(r[state][action] for r in results) / len(results) # Average Q-values # Compute the optimal path from start to end def get_best_path(): """Find the best path using the learned Q-values.""" state = start # Start at the initial position path = [state] # Initialize path visited = set() # Track visited states to avoid loops while state != end: if state in visited: break # Avoid infinite loops visited.add(state) # Mark state as visited action = max(Q[state], key=Q[state].get) # Choose the best action based on Q-values state = get_next_state(state, action) # Move to the next state path.append(state) # Append to path return path # Return the computed path print(get_best_path()) # Print the optimal path