diff --git a/plugins/kuberay/workspace/install.yml b/plugins/kuberay/workspace/install.yml index e3bf2fc884cb3c7a8ae22be0abdc86d251c6c10c..82d55d8e41c115fe16ed3ddbc1296cc2b78d3db9 100644 --- a/plugins/kuberay/workspace/install.yml +++ b/plugins/kuberay/workspace/install.yml @@ -100,15 +100,15 @@ msg: "Error: KubeRay installation failed" when: head_pod.stdout == "" - - name: Check if Head Pod is running + - name: Check if Head Pod is ready shell: | kubectl get pods -n {{ namespace }} \ --selector=ray.io/node-type=head \ - -o jsonpath='{.items[0].status.phase}' - register: head_pod_status - retries: 20 + -o jsonpath='{.items[0].status.conditions[?(@.type=="Ready")].status}' + register: head_pod_ready + retries: 30 delay: 6 - until: head_pod_status.stdout == "Running" + until: head_pod_ready.stdout == "True" - name: Get Worker Pods info shell: | @@ -117,7 +117,7 @@ -o json register: worker_pods_info - - name: Check if every Worker Pod is running + - name: Check if every Worker Pod is ready shell: | kubectl get pods -n {{ namespace }} \ --selector=ray.io/node-type=worker \ @@ -127,7 +127,7 @@ delay: 6 until: > ( - (worker_pods_status.stdout | from_json | json_query('items[*].status.phase') | select('equalto', 'Running') | list | length) + (worker_pods_status.stdout | from_json | json_query('items[*].status.conditions[?(@.type==`Ready`)].status') | flatten | select('equalto', 'True') | list | length) == (worker_pods_status.stdout | from_json | json_query('items') | length) ) diff --git a/plugins/kuberay/workspace/train.yaml b/plugins/kuberay/workspace/train.yaml index a60cd9c3c7f4939fd64e4f2f2750f0cd1abb6abe..4274372b5e82ff5b24c15a5c17b0fc6d84412c6b 100644 --- a/plugins/kuberay/workspace/train.yaml +++ b/plugins/kuberay/workspace/train.yaml @@ -27,15 +27,15 @@ dest: "/tmp/{{ test_picture_name }}" mode: '0744' - - name: Check if Head Pod is running + - name: Check if Head Pod is ready shell: | kubectl get pods -n {{ namespace }} \ --selector=ray.io/node-type=head \ - -o jsonpath='{.items[0].status.phase}' - register: head_pod_status + -o jsonpath='{.items[0].status.conditions[?(@.type=="Ready")].status}' + register: head_pod_ready retries: 10 delay: 5 - until: head_pod_status.stdout == "Running" + until: head_pod_ready.stdout == "True" - name: Get Worker Pods info shell: | @@ -44,7 +44,7 @@ -o json register: worker_pods_info - - name: Check if every Worker Pod is running + - name: Check if every Worker Pod is ready shell: | kubectl get pods -n {{ namespace }} \ --selector=ray.io/node-type=worker \ @@ -54,7 +54,7 @@ delay: 5 until: > ( - (worker_pods_status.stdout | from_json | json_query('items[*].status.phase') | select('equalto', 'Running') | list | length) + (worker_pods_status.stdout | from_json | json_query('items[*].status.conditions[?(@.type==`Ready`)].status') | flatten | select('equalto', 'True') | list | length) == (worker_pods_status.stdout | from_json | json_query('items') | length) )