import gymnasium as gym from gymnasium import spaces import numpy as np # Constantes pour les états et les récompenses CMD_AVANCER = 0 CMD_TOURNER = 1 ETAT_FAIM = 2 ETAT_SOMMEIL = 3 ETAT_HUMEUR = 4 # Actions ACTION_ARRETER = 0 ACTION_AVANCER = 1 ACTION_TOURNE_G = 2 ACTION_TOURNE_D = 3 class MiRobotEnv(gym.Env): """Environnement de simulation pour MiRobot, un chiot robot apprenant à réagir aux commandes du maître et à son état interne. """ metadata = {"render_modes": ["human"], "render_fps": 30} def __init__(self): super(MiRobotEnv, self).__init__() self.action_space = spaces.Discrete(4) low = np.array([0.0, 0.0, 0.0, 0.0, -1.0], dtype=np.float32) high = np.array([1.0, 1.0, 1.0, 1.0, 1.0], dtype=np.float32) self.observation_space = spaces.Box(low=low, high=high, dtype=np.float32) self.state = None def reset(self, seed=None, options=None): super().reset(seed=seed) initial_state = np.array([ 0.0, 0.0, self.np_random.uniform(low=0.0, high=0.2), self.np_random.uniform(low=0.0, high=0.2), self.np_random.uniform(low=-0.1, high=0.1) ], dtype=np.float32) self.state = initial_state info = {} return self.state, info def _update_internal_states(self): self.state[ETAT_FAIM] = np.clip(self.state[ETAT_FAIM] + 0.005, 0.0, 1.0) self.state[ETAT_SOMMEIL] = np.clip(self.state[ETAT_SOMMEIL] + 0.003, 0.0, 1.0) humeur_decay = -0.005 if (self.state[ETAT_FAIM] > 0.5 or self.state[ETAT_SOMMEIL] > 0.5) else 0.001 self.state[ETAT_HUMEUR] = np.clip(self.state[ETAT_HUMEUR] + humeur_decay, -1.0, 1.0) def _calculate_reward(self, action): reward = 0.0 if self.state[CMD_AVANCER] > 0.5: if action == ACTION_AVANCER: reward += 1.0 elif action == ACTION_ARRETER: reward -= 0.5 if self.state[CMD_TOURNER] > 0.5: if action == ACTION_TOURNE_G or action == ACTION_TOURNE_D: reward += 1.0 elif action == ACTION_AVANCER: reward -= 0.5 reward -= self.state[ETAT_FAIM] * 0.1 reward -= self.state[ETAT_SOMMEIL] * 0.1 reward += self.state[ETAT_HUMEUR] * 0.1 return reward def _simulate_user_command(self): if self.np_random.random() < 0.2: self.state[CMD_AVANCER] = 0.0 self.state[CMD_TOURNER] = 0.0 choice = self.np_random.integers(0, 3) if choice == 1: self.state[CMD_AVANCER] = 1.0 elif choice == 2: self.state[CMD_TOURNER] = 1.0 def step(self, action): self._update_internal_states() reward = self._calculate_reward(action) self._simulate_user_command() terminated = False truncated = False info = {} return self.state, reward, terminated, truncated, info def render(self, mode='human'): faim = self.state[ETAT_FAIM] * 100 sommeil = self.state[ETAT_SOMMEIL] * 100 humeur = self.state[ETAT_HUMEUR] cmd_a = "OUI" if self.state[CMD_AVANCER] > 0.5 else "NON" cmd_t = "OUI" if self.state[CMD_TOURNER] > 0.5 else "NON" print(f"--- État MiRobot ---") print(f" > Commande Avancer: {cmd_a}, Commande Tourner: {cmd_t}") print(f" > Faim: {faim:.0f}%, Sommeil: {sommeil:.0f}%") print(f" > Humeur: {humeur:.2f} (entre -1.0 et 1.0)") def close(self): pass