Skip to content

Commit 8b69285

Browse files
awaelchliEtayLivneEtay Livnedependabot[bot]tchaton
authored
Patch release 2.3.1 (#20021)
Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: Etay Livne <53942171+EtayLivne@users.noreply.github.com> Co-authored-by: Etay Livne <etay.livne@mobileye.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: thomas chaton <thomas@grid.ai>
1 parent a42484c commit 8b69285

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

50 files changed

+757
-445
lines changed

.github/checkgroup.yml

+6-6
Original file line numberDiff line numberDiff line change
@@ -242,9 +242,9 @@ subprojects:
242242
- "!*.md"
243243
- "!**/*.md"
244244
checks:
245-
- "app-pytest (macOS-11, lightning, 3.8, latest)"
246-
- "app-pytest (macOS-11, lightning, 3.8, oldest)"
247-
- "app-pytest (macOS-11, app, 3.9, latest)"
245+
- "app-pytest (macOS-12, lightning, 3.8, latest)"
246+
- "app-pytest (macOS-12, lightning, 3.8, oldest)"
247+
- "app-pytest (macOS-12, app, 3.9, latest)"
248248
- "app-pytest (macOS-12, app, 3.11, latest)"
249249
- "app-pytest (ubuntu-20.04, lightning, 3.8, latest)"
250250
- "app-pytest (ubuntu-20.04, lightning, 3.8, oldest)"
@@ -270,9 +270,9 @@ subprojects:
270270
- "!*.md"
271271
- "!**/*.md"
272272
checks:
273-
- "app-examples (macOS-11, lightning, 3.9, latest)"
274-
- "app-examples (macOS-11, lightning, 3.9, oldest)"
275-
- "app-examples (macOS-11, app, 3.9, latest)"
273+
- "app-examples (macOS-12, lightning, 3.9, latest)"
274+
- "app-examples (macOS-12, lightning, 3.9, oldest)"
275+
- "app-examples (macOS-12, app, 3.9, latest)"
276276
- "app-examples (ubuntu-20.04, lightning, 3.9, latest)"
277277
- "app-examples (ubuntu-20.04, lightning, 3.9, oldest)"
278278
- "app-examples (ubuntu-20.04, app, 3.9, latest)"

.github/workflows/ci-examples-app.yml

+2-2
Original file line numberDiff line numberDiff line change
@@ -36,13 +36,13 @@ jobs:
3636
strategy:
3737
fail-fast: false
3838
matrix:
39-
os: [ubuntu-20.04, macOS-11, windows-2022]
39+
os: [ubuntu-20.04, macOS-12, windows-2022]
4040
pkg-name: ["lightning"]
4141
python-version: ["3.9"]
4242
requires: ["oldest", "latest"]
4343
include:
4444
# "app" installs the standalone package
45-
- { os: "macOS-11", pkg-name: "app", python-version: "3.9", requires: "latest" }
45+
- { os: "macOS-12", pkg-name: "app", python-version: "3.9", requires: "latest" }
4646
- { os: "ubuntu-20.04", pkg-name: "app", python-version: "3.9", requires: "latest" }
4747
- { os: "windows-2022", pkg-name: "app", python-version: "3.9", requires: "latest" }
4848
# Timeout: https://stackoverflow.com/a/59076067/4521646

.github/workflows/ci-tests-app.yml

+2-2
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ jobs:
3838
strategy:
3939
fail-fast: false
4040
matrix:
41-
os: ["ubuntu-20.04", "macOS-11", "windows-2022"]
41+
os: ["ubuntu-20.04", "macOS-12", "windows-2022"]
4242
pkg-name: ["lightning"]
4343
python-version: ["3.8"]
4444
requires: ["oldest", "latest"]
@@ -48,7 +48,7 @@ jobs:
4848
- { os: "ubuntu-22.04", pkg-name: "app", python-version: "3.11", requires: "latest" }
4949
- { os: "windows-2022", pkg-name: "app", python-version: "3.11", requires: "latest" }
5050
# "app" installs the standalone package
51-
- { os: "macOS-11", pkg-name: "app", python-version: "3.9", requires: "latest" }
51+
- { os: "macOS-12", pkg-name: "app", python-version: "3.9", requires: "latest" }
5252
- { os: "ubuntu-20.04", pkg-name: "app", python-version: "3.9", requires: "latest" }
5353
- { os: "windows-2022", pkg-name: "app", python-version: "3.8", requires: "latest" }
5454
# Timeout: https://stackoverflow.com/a/59076067/4521646

.github/workflows/docker-build.yml

+3-3
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ jobs:
8383
gh_env.write("DOCKER_TAGS=" + ",".join(tags))
8484
shell: python
8585

86-
- uses: docker/build-push-action@v5
86+
- uses: docker/build-push-action@v6
8787
with:
8888
build-args: |
8989
PYTHON_VERSION=${{ matrix.python_version }}
@@ -119,7 +119,7 @@ jobs:
119119
with:
120120
username: ${{ secrets.DOCKER_USERNAME }}
121121
password: ${{ secrets.DOCKER_PASSWORD }}
122-
- uses: docker/build-push-action@v5
122+
- uses: docker/build-push-action@v6
123123
with:
124124
build-args: |
125125
PYTHON_VERSION=${{ matrix.python_version }}
@@ -151,7 +151,7 @@ jobs:
151151
- name: Build Conda Docker
152152
# publish master/release
153153
continue-on-error: true
154-
uses: docker/build-push-action@v5
154+
uses: docker/build-push-action@v6
155155
with:
156156
file: dockers/nvidia/Dockerfile
157157
push: false

pyproject.toml

+1
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,7 @@ exclude = [
140140
"src/lightning/app/cli/component-template",
141141
"src/lightning/app/cli/pl-app-template",
142142
"src/lightning/app/cli/react-ui-template",
143+
"src/lightning/app/launcher/utils.py",
143144
]
144145
install_types = "True"
145146
non_interactive = "True"

requirements/app/app.txt

+4-2
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
1-
lightning-cloud == 0.5.69 # Must be pinned to ensure compatibility
1+
lightning-cloud == 0.5.70 # Must be pinned to ensure compatibility
22
packaging
33
typing-extensions >=4.4.0, <4.10.0
44
deepdiff >=5.7.0, <6.6.0
55
fsspec[http] >=2022.5.0, <2023.11.0
66
croniter >=1.3.0, <1.5.0 # strict; TODO: for now until we find something more robust.
77
traitlets >=5.3.0, <5.12.0
88
arrow >=1.2.0, <1.3.0
9-
lightning-utilities >=0.8.0, <0.12.0
9+
lightning-utilities >=0.10.0, <0.12.0
1010
beautifulsoup4 >=4.8.0, <4.13.0
1111
inquirer >=2.10.0, <3.2.0
1212
psutil <5.9.6
@@ -27,3 +27,5 @@ urllib3 <2.0.0
2727
uvicorn <0.24.0
2828
websocket-client <1.7.0
2929
websockets <11.1.0
30+
numpy >=1.17.2, <2.0
31+
msgpack

requirements/ci.txt

+4-3
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
1-
setuptools
2-
wheel
1+
setuptools <70.1.1
2+
wheel <0.44.0
33
awscli >=1.30.0, <1.31.0
44
twine ==4.0.1
5+
importlib-metadata <8.0.0
56
wget
6-
packaging
7+
packaging <24.2

requirements/fabric/base.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,4 @@ torch >=2.0.0, <2.4.0
66
fsspec[http] >=2022.5.0, <2024.4.0
77
packaging >=20.0, <=23.1
88
typing-extensions >=4.4.0, <4.10.0
9-
lightning-utilities >=0.8.0, <0.12.0
9+
lightning-utilities >=0.10.0, <0.12.0

requirements/pytorch/base.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -9,4 +9,4 @@ fsspec[http] >=2022.5.0, <2024.4.0
99
torchmetrics >=0.7.0, <1.3.0 # needed for using fixed compare_version
1010
packaging >=20.0, <=23.1
1111
typing-extensions >=4.4.0, <4.10.0
12-
lightning-utilities >=0.8.0, <0.12.0
12+
lightning-utilities >=0.10.0, <0.12.0

requirements/pytorch/extra.txt

+2-2
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@
33

44
# extended list of package dependencies to reach full functionality
55
matplotlib>3.1, <3.9.0
6-
omegaconf >=2.0.5, <2.4.0
7-
hydra-core >=1.0.5, <1.4.0
6+
omegaconf >=2.2.3, <2.4.0
7+
hydra-core >=1.2.0, <1.4.0
88
jsonargparse[signatures] >=4.27.7, <4.28.0
99
rich >=12.3.0, <13.6.0
1010
tensorboardX >=2.2, <2.7.0 # min version is set by torch.onnx missing attribute

src/lightning/app/cli/lightning_cli.py

+109-1
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,19 @@
4040
from lightning.app.cli.lightning_cli_delete import delete
4141
from lightning.app.cli.lightning_cli_launch import launch
4242
from lightning.app.cli.lightning_cli_list import get_list
43-
from lightning.app.core.constants import ENABLE_APP_COMMENT_COMMAND_EXECUTION, get_lightning_cloud_url
43+
from lightning.app.core.constants import (
44+
APP_SERVER_HOST,
45+
APP_SERVER_PORT,
46+
ENABLE_APP_COMMENT_COMMAND_EXECUTION,
47+
get_lightning_cloud_url,
48+
)
49+
from lightning.app.launcher.launcher import (
50+
run_lightning_flow,
51+
run_lightning_work,
52+
serve_frontend,
53+
start_application_server,
54+
start_flow_and_servers,
55+
)
4456
from lightning.app.runners.cloud import CloudRuntime
4557
from lightning.app.runners.runtime import dispatch
4658
from lightning.app.runners.runtime_type import RuntimeType
@@ -393,3 +405,99 @@ def _prepare_file(file: str) -> str:
393405
return file
394406

395407
raise FileNotFoundError(f"The provided file {file} hasn't been found.")
408+
409+
410+
@run.command("server")
411+
@click.argument("file", type=click.Path(exists=True))
412+
@click.option("--queue-id", help="ID for identifying queue", default="", type=str)
413+
@click.option("--host", help="Application running host", default=APP_SERVER_HOST, type=str)
414+
@click.option("--port", help="Application running port", default=APP_SERVER_PORT, type=int)
415+
def run_server(file: str, queue_id: str, host: str, port: int) -> None:
416+
"""It takes the application file as input, build the application object and then use that to run the application
417+
server.
418+
419+
This is used by the cloud runners to start the status server for the application
420+
421+
"""
422+
logger.debug(f"Run Server: {file} {queue_id} {host} {port}")
423+
start_application_server(file, host, port, queue_id=queue_id)
424+
425+
426+
@run.command("flow")
427+
@click.argument("file", type=click.Path(exists=True))
428+
@click.option("--queue-id", help="ID for identifying queue", default="", type=str)
429+
@click.option("--base-url", help="Base url at which the app server is hosted", default="")
430+
def run_flow(file: str, queue_id: str, base_url: str) -> None:
431+
"""It takes the application file as input, build the application object, proxy all the work components and then run
432+
the application flow defined in the root component.
433+
434+
It does exactly what a singleprocess dispatcher would do but with proxied work components.
435+
436+
"""
437+
logger.debug(f"Run Flow: {file} {queue_id} {base_url}")
438+
run_lightning_flow(file, queue_id=queue_id, base_url=base_url)
439+
440+
441+
@run.command("work")
442+
@click.argument("file", type=click.Path(exists=True))
443+
@click.option("--work-name", type=str)
444+
@click.option("--queue-id", help="ID for identifying queue", default="", type=str)
445+
def run_work(file: str, work_name: str, queue_id: str) -> None:
446+
"""Unlike other entrypoints, this command will take the file path or module details for a work component and run
447+
that by fetching the states from the queues."""
448+
logger.debug(f"Run Work: {file} {work_name} {queue_id}")
449+
run_lightning_work(
450+
file=file,
451+
work_name=work_name,
452+
queue_id=queue_id,
453+
)
454+
455+
456+
@run.command("frontend")
457+
@click.argument("file", type=click.Path(exists=True))
458+
@click.option("--flow-name")
459+
@click.option("--host")
460+
@click.option("--port", type=int)
461+
def run_frontend(file: str, flow_name: str, host: str, port: int) -> None:
462+
"""Serve the frontend specified by the given flow."""
463+
logger.debug(f"Run Frontend: {file} {flow_name} {host}")
464+
serve_frontend(file=file, flow_name=flow_name, host=host, port=port)
465+
466+
467+
@run.command("flow-and-servers")
468+
@click.argument("file", type=click.Path(exists=True))
469+
@click.option("--queue-id", help="ID for identifying queue", default="", type=str)
470+
@click.option("--base-url", help="Base url at which the app server is hosted", default="")
471+
@click.option("--host", help="Application running host", default=APP_SERVER_HOST, type=str)
472+
@click.option("--port", help="Application running port", default=APP_SERVER_PORT, type=int)
473+
@click.option(
474+
"--flow-port",
475+
help="Pair of flow name and frontend port",
476+
type=(str, int),
477+
multiple=True,
478+
)
479+
def run_flow_and_servers(
480+
file: str,
481+
base_url: str,
482+
queue_id: str,
483+
host: str,
484+
port: int,
485+
flow_port: Tuple[Tuple[str, int]],
486+
) -> None:
487+
"""It takes the application file as input, build the application object and then use that to run the application
488+
flow defined in the root component, the application server and all the flow frontends.
489+
490+
This is used by the cloud runners to start the flow, the status server and all frontends for the application
491+
492+
"""
493+
logger.debug(f"Run Flow: {file} {queue_id} {base_url}")
494+
logger.debug(f"Run Server: {file} {queue_id} {host} {port}.")
495+
logger.debug(f"Run Frontend's: {flow_port}")
496+
start_flow_and_servers(
497+
entrypoint_file=file,
498+
base_url=base_url,
499+
queue_id=queue_id,
500+
host=host,
501+
port=port,
502+
flow_names_and_ports=flow_port,
503+
)

src/lightning/app/core/app.py

+5
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
FLOW_DURATION_SAMPLES,
3636
FLOW_DURATION_THRESHOLD,
3737
FRONTEND_DIR,
38+
SHOULD_START_WORKS_WITH_FLOW,
3839
STATE_ACCUMULATE_WAIT,
3940
)
4041
from lightning.app.core.queues import BaseQueue
@@ -144,6 +145,7 @@ def __init__(
144145
self.threads: List[threading.Thread] = []
145146
self.exception = None
146147
self.collect_changes: bool = True
148+
self._should_start_works_with_flow: bool = SHOULD_START_WORKS_WITH_FLOW
147149

148150
self.status: Optional[AppStatus] = None
149151
# TODO: Enable ready locally for opening the UI.
@@ -733,6 +735,9 @@ def _send_flow_to_work_deltas(self, state: dict) -> None:
733735
self.flow_to_work_delta_queues[w.name].put(deep_diff)
734736

735737
def _start_with_flow_works(self) -> None:
738+
if not self._should_start_works_with_flow:
739+
return
740+
736741
for w in self.works:
737742
if w._start_with_flow:
738743
parallel = w.parallel

src/lightning/app/core/constants.py

+33-1
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,10 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15+
import json
1516
import os
1617
from pathlib import Path
17-
from typing import Optional
18+
from typing import Any, Optional
1819

1920
import lightning_cloud.env
2021

@@ -101,6 +102,37 @@ def get_lightning_cloud_url() -> str:
101102

102103
BATCH_DELTA_COUNT = int(os.getenv("BATCH_DELTA_COUNT", "128"))
103104
CHECK_ERROR_QUEUE_INTERVAL = float(os.getenv("CHECK_ERROR_QUEUE_INTERVAL", "30"))
105+
SHOULD_START_WORKS_WITH_FLOW = bool(int(os.getenv("SHOULD_START_WORKS_WITH_FLOW", "1")))
106+
IS_RUNNING_IN_FLOW = os.getenv("LIGHTNING_CLOUD_WORK_NAME", None) is None
107+
108+
109+
class DistributedPluginChecker:
110+
def __init__(self) -> None:
111+
self.distributed_arguments = os.getenv("DISTRIBUTED_ARGUMENTS", None)
112+
if self.distributed_arguments:
113+
self.distributed_arguments = json.loads(self.distributed_arguments)
114+
115+
self.running_distributed_plugin = False
116+
117+
if self.distributed_arguments and os.getenv("LIGHTNING_CLOUD_WORK_NAME"):
118+
self.running_distributed_plugin = True
119+
120+
def __bool__(self) -> bool:
121+
return self.running_distributed_plugin
122+
123+
def should_create_work(self, work: Any) -> bool:
124+
if not self.distributed_arguments:
125+
return True
126+
127+
num_nodes = self.distributed_arguments.get("num_instances", 0)
128+
node_rank = int(work.name.split(".")[-1])
129+
130+
# Only the start with flow works are skipped for performance purposes
131+
return node_rank >= num_nodes
132+
133+
134+
# TODO (tchaton): Add LitData and JobPlugin optimizations
135+
PLUGIN_CHECKER = IS_DISTRIBUTED_PLUGIN = DistributedPluginChecker()
104136

105137

106138
def enable_multiple_works_in_default_container() -> bool:

src/lightning/app/core/flow.py

+5
Original file line numberDiff line numberDiff line change
@@ -836,6 +836,11 @@ def load_state_dict(self, flow_state, children_states, strict) -> None:
836836
elif strict:
837837
raise ValueError(f"The component {child_name} wasn't instantiated for the component {self.name}")
838838

839+
def stop_works(self, works: List[Any]) -> None:
840+
if self._backend is None:
841+
raise RuntimeError("Your flow should have a backend attached. Found None.")
842+
self._backend.stop_works(works)
843+
839844

840845
class _RootFlow(LightningFlow):
841846
def __init__(self, work: LightningWork) -> None:

0 commit comments

Comments
 (0)