Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,13 @@ repos:
(?x)^(
.*cs.meta|
.*.css|
.*.meta
.*.meta|
.*.asset|
.*.prefab|
.*.unity|
.*.json
)$
args: [--fix=lf]
args: [--fix=crlf]

- id: trailing-whitespace
name: trailing-whitespace-markdown
Expand Down
19 changes: 12 additions & 7 deletions docs/Python-Gym-API-Documentation.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,18 +59,22 @@ Environment initialization
#### reset

```python
| reset() -> Union[List[np.ndarray], np.ndarray]
| reset(*, seed: int | None = None, options: dict[str, Any] | None = None) -> Tuple[np.ndarray, Dict]
```

Resets the state of the environment and returns an initial observation.
Returns: observation (object/list): the initial observation of the
space.
Resets the state of the environment and returns an initial observation and info.

**Returns**:

- `observation` _object/list_ - the initial observation of the
space.
- `info` _dict_ - contains auxiliary diagnostic information.

<a name="mlagents_envs.envs.unity_gym_env.UnityToGymWrapper.step"></a>
#### step

```python
| step(action: List[Any]) -> GymStepResult
| step(action: Any) -> GymStepResult
```

Run one timestep of the environment's dynamics. When end of
Expand All @@ -86,14 +90,15 @@ Accepts an action and returns a tuple (observation, reward, done, info).

- `observation` _object/list_ - agent's observation of the current environment
reward (float/list) : amount of reward returned after previous action
- `done` _boolean/list_ - whether the episode has ended.
- `terminated` _boolean/list_ - whether the episode has ended by termination.
- `truncated` _boolean/list_ - whether the episode has ended by truncation.
- `info` _dict_ - contains auxiliary diagnostic information.

<a name="mlagents_envs.envs.unity_gym_env.UnityToGymWrapper.render"></a>
#### render

```python
| render(mode="rgb_array")
| render()
```

Return the latest visual observations.
Expand Down
44 changes: 30 additions & 14 deletions ml-agents-envs/mlagents_envs/envs/unity_gym_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
import numpy as np
from typing import Any, Dict, List, Optional, Tuple, Union

import gym
from gym import error, spaces
import gymnasium as gym
from gymnasium import error, spaces

from mlagents_envs.base_env import ActionTuple, BaseEnv
from mlagents_envs.base_env import DecisionSteps, TerminalSteps
Expand All @@ -20,7 +20,7 @@ class UnityGymException(error.Error):


logger = logging_util.get_logger(__name__)
GymStepResult = Tuple[np.ndarray, float, bool, Dict]
GymStepResult = Tuple[np.ndarray, float, bool, bool, Dict]


class UnityToGymWrapper(gym.Env):
Expand Down Expand Up @@ -151,21 +151,26 @@ def __init__(
else:
self._observation_space = list_spaces[0] # only return the first one

def reset(self) -> Union[List[np.ndarray], np.ndarray]:
"""Resets the state of the environment and returns an initial observation.
Returns: observation (object/list): the initial observation of the
def reset(
self, *, seed: int | None = None, options: dict[str, Any] | None = None
) -> Tuple[np.ndarray, Dict]:
"""Resets the state of the environment and returns an initial observation and info.
Returns:
observation (object/list): the initial observation of the
space.
info (dict): contains auxiliary diagnostic information.
"""
super().reset(seed=seed, options=options)
self._env.reset()
decision_step, _ = self._env.get_steps(self.name)
n_agents = len(decision_step)
self._check_agents(n_agents)
self.game_over = False

res: GymStepResult = self._single_step(decision_step)
return res[0]
return res[0], res[4]

def step(self, action: List[Any]) -> GymStepResult:
def step(self, action: Any) -> GymStepResult:
"""Run one timestep of the environment's dynamics. When end of
episode is reached, you are responsible for calling `reset()`
to reset this environment's state.
Expand All @@ -175,14 +180,15 @@ def step(self, action: List[Any]) -> GymStepResult:
Returns:
observation (object/list): agent's observation of the current environment
reward (float/list) : amount of reward returned after previous action
done (boolean/list): whether the episode has ended.
terminated (boolean/list): whether the episode has ended by termination.
truncated (boolean/list): whether the episode has ended by truncation.
info (dict): contains auxiliary diagnostic information.
"""
if self.game_over:
raise UnityGymException(
"You are calling 'step()' even though this environment has already "
"returned done = True. You must always call 'reset()' once you "
"receive 'done = True'."
"returned `terminated` or `truncated` as True. You must always call 'reset()' once you "
"receive `terminated` or `truncated` as True."
)
if self._flattener is not None:
# Translate action into list
Expand Down Expand Up @@ -227,9 +233,19 @@ def _single_step(self, info: Union[DecisionSteps, TerminalSteps]) -> GymStepResu
visual_obs = self._get_vis_obs_list(info)
self.visual_obs = self._preprocess_single(visual_obs[0][0])

done = isinstance(info, TerminalSteps)
if isinstance(info, TerminalSteps):
interrupted = info.interrupted
terminated, truncated = not interrupted, interrupted
else:
terminated, truncated = False, False

return (default_observation, info.reward[0], done, {"step": info})
return (
default_observation,
info.reward[0],
terminated,
truncated,
{"step": info},
)

def _preprocess_single(self, single_visual_obs: np.ndarray) -> np.ndarray:
if self.uint8_visual:
Expand Down Expand Up @@ -276,7 +292,7 @@ def _get_vec_obs_size(self) -> int:
result += obs_spec.shape[0]
return result

def render(self, mode="rgb_array"):
def render(self):
"""
Return the latest visual observations.
Note that it will not render a new frame of the environment.
Expand Down
38 changes: 38 additions & 0 deletions ml-agents-envs/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
[tool.poetry]
name = "mlagents_envs"
version = "1.2.0.dev0"
description = "Unity Machine Learning Agents Interface"
homepage = "https://github.com/Unity-Technologies/ml-agents"
authors = ["Unity Technologies <ML-Agents@unity3d.com>"]
classifiers=[
"Intended Audience :: Developers",
"Topic :: Scientific/Engineering :: Artificial Intelligence",
"License :: OSI Approved :: Apache Software License",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
]
readme = "README.md"

packages = [
{ include = "mlagents_envs", from = "." },
]
include = ["mlagents_envs/*"]
exclude = ["*.tests", "*.tests.*", "tests.*", "tests", "colabs", "*.ipynb"]

[tool.poetry.dependencies]
python = "^3.9"
grpcio = ">=1.11.0,<=1.48.2"
Pillow = ">=4.2.1"
protobuf = ">=3.6,<3.21"
pyyaml = ">=3.1.0"
gymnasium = ">=0.25.0"
pettingzoo = ">=1.15.0"
numpy = ">=1.23.5,<2.0"
filelock = ">=3.4.0"
cloudpickle = "*"

[build-system]
requires = ["poetry-core>=1.9.0"]
build-backend = "poetry.core.masonry.api"
8 changes: 4 additions & 4 deletions ml-agents-envs/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,12 +58,12 @@ def run(self):
"Pillow>=4.2.1",
"protobuf>=3.6,<3.21",
"pyyaml>=3.1.0",
"gym>=0.21.0",
"pettingzoo==1.15.0",
"numpy>=1.23.5,<1.24.0",
"gymnasium>=0.25.0",
"pettingzoo>=1.15.0",
"numpy>=1.23.5,<2.0",
"filelock>=3.4.0",
],
python_requires=">=3.10.1,<=3.10.12",
python_requires=">=3.9,<4",
# TODO: Remove this once mypy stops having spurious setuptools issues.
cmdclass={"verify": VerifyVersionCommand}, # type: ignore
)
4 changes: 2 additions & 2 deletions ml-agents/mlagents/trainers/subprocess_env_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
UnityCommunicatorStoppedException,
)
from multiprocessing import Process, Pipe, Queue
from multiprocessing.connection import Connection
from multiprocessing.connection import Connection, PipeConnection
from queue import Empty as EmptyQueueException
from mlagents_envs.base_env import BaseEnv, BehaviorName, BehaviorSpec
from mlagents_envs import logging_util
Expand Down Expand Up @@ -77,7 +77,7 @@ class StepResponse(NamedTuple):


class UnityEnvWorker:
def __init__(self, process: Process, worker_id: int, conn: Connection):
def __init__(self, process: Process, worker_id: int, conn: PipeConnection):
self.process = process
self.worker_id = worker_id
self.conn = conn
Expand Down
7 changes: 3 additions & 4 deletions utils/generate_markdown_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
import argparse
import hashlib


# pydoc-markdown -I . -m module_name --render_toc > doc.md


Expand Down Expand Up @@ -52,8 +51,8 @@ def remove_trailing_whitespace(filename):
# compare source and destination and write only if changed
if source_file != destination_file:
num_changed += 1
with open(filename, "wb") as f:
f.write(destination_file.encode())
with open(filename, "w", newline="\r\n") as f:
f.write(destination_file)


if __name__ == "__main__":
Expand Down Expand Up @@ -84,7 +83,7 @@ def remove_trailing_whitespace(filename):
for submodule in submodules:
module_args.append("-m")
module_args.append(f"{module_name}.{submodule}")
with open(output_file_name, "w") as output_file:
with open(output_file_name, "wb") as output_file:
subprocess_args = [
"pydoc-markdown",
"-I",
Expand Down