-
Notifications
You must be signed in to change notification settings - Fork 20
Better monitoring #336
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: develop
Are you sure you want to change the base?
Better monitoring #336
Changes from 6 commits
45c8a76
6d21081
b1b2021
08ef79e
3bbc2c9
d23c172
2f157c7
f57c879
f867167
919b62d
c1058a9
6fb36b1
4a84e76
d1d1a32
4e07f29
b54c0e3
154f29b
1366ede
0b1d965
9a65742
1f9af1a
b7cccee
bcead64
935fc58
8c9a8d3
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -25,6 +25,8 @@ | |
| ProfileEvent, ProfileEventType, ProfileTimestamp) | ||
| from libmuscle.snapshot import SnapshotMetadata | ||
| from libmuscle.timestamp import Timestamp | ||
| from libmuscle.manager.instance_manager import InstanceManager | ||
| from libmuscle.manager.instantiator import MonitorRequest | ||
|
|
||
|
|
||
| _logger = logging.getLogger(__name__) | ||
|
|
@@ -79,7 +81,8 @@ def __init__( | |
| topology_store: TopologyStore, | ||
| snapshot_registry: SnapshotRegistry, | ||
| deadlock_detector: DeadlockDetector, | ||
| run_dir: Optional[RunDir] | ||
| run_dir: Optional[RunDir], | ||
| instance_manager: Optional[InstanceManager] = None, | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This shouldn't be optional, there's always an InstanceManager, so we can always pass it. I really want to have the runtime structure as inflexible (and therefore comprehensible) as possible.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As discussed: the InstanceManager is Optional, as muscle can run without it. If you want to make the InstanceManager mandatory, I think that should be another PR |
||
| ) -> None: | ||
| """Create an MMPRequestHandler. | ||
|
|
||
|
|
@@ -98,6 +101,7 @@ def __init__( | |
| self._deadlock_detector = deadlock_detector | ||
| self._run_dir = run_dir | ||
| self._reference_time = time.monotonic() | ||
| self._instance_manager = instance_manager | ||
|
|
||
| def handle_request(self, request: bytes) -> bytes: | ||
| """Handles a manager request. | ||
|
|
@@ -146,13 +150,15 @@ def close(self) -> None: | |
|
|
||
| def _register_instance( | ||
| self, instance_id: str, locations: List[str], | ||
| ports: List[List[str]], version: str = '') -> Any: | ||
| ports: List[List[str]], pid: int, hostname: str, version: str = '') -> Any: | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Instances that use MPI will have a hostname/pid for each rank. How would that be passed here?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Only the process calling "register_instance" will be monitored, if we want to also monitor all the other MPI processes, then I think that the following needs to be done:
I think the backend is flexible enough to have Many PIDs for a single Instance ID |
||
| """Handle a register instance request. | ||
|
|
||
| Args: | ||
| instance_id: ID of the instance to register | ||
| locations: Locations where it can be reached | ||
| ports: Ports of this instance | ||
| pid: PID of the instance | ||
| hostname: Hostname of the instance | ||
| version: Version of libmuscle that this instance uses | ||
|
|
||
| Returns: | ||
|
|
@@ -169,11 +175,14 @@ def _register_instance( | |
| f' manager libmuscle version ({libmuscle.__version__}).' | ||
| ' Please ensure that the instance and the manager use the' | ||
| ' same version of libmuscle.'] | ||
|
|
||
| port_objs = [decode_port(p) for p in ports] | ||
| instance = Reference(instance_id) | ||
|
|
||
| if self._instance_manager: | ||
| self._instance_manager._requests_out.put(MonitorRequest(instance_id, hostname, pid)) | ||
|
v1kko marked this conversation as resolved.
Outdated
|
||
|
|
||
| try: | ||
| self._instance_registry.add(instance, locations, port_objs) | ||
| self._instance_registry.add(instance, locations, port_objs, pid, hostname) | ||
|
|
||
| _logger.info(f'Registered instance {instance_id}') | ||
| return [ResponseType.SUCCESS.value] | ||
|
|
@@ -417,7 +426,8 @@ def __init__( | |
| topology_store: TopologyStore, | ||
| snapshot_registry: SnapshotRegistry, | ||
| deadlock_detector: DeadlockDetector, | ||
| run_dir: Optional[RunDir] | ||
| run_dir: Optional[RunDir], | ||
| instance_manager: Optional[InstanceManager] | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same here about optionality, and before run_dir with the other system components makes more sense. |
||
| ) -> None: | ||
| """Create an MMPServer. | ||
|
|
||
|
|
@@ -439,7 +449,8 @@ def __init__( | |
| """ | ||
| self._handler = MMPRequestHandler( | ||
| logger, profile_store, configuration, instance_registry, | ||
| topology_store, snapshot_registry, deadlock_detector, run_dir) | ||
| topology_store, snapshot_registry, deadlock_detector, run_dir, | ||
| instance_manager=instance_manager) | ||
| try: | ||
| self._server = TcpTransportServer(self._handler, 9000) | ||
| except OSError as e: | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.