Hamza Ahmed
09/01/2021, 12:37 AMA Lazarus process attempted to reschedule this run 3 times without success. Marking as failed.
The only other errors are in the prefect towel logs [see comments]
Any clue as to what could be wrong?Hamza Ahmed
09/01/2021, 12:37 AM{"severity": "ERROR", "name": "prefect-server.Lazarus", "message": "Unexpected error: ConnectError(gaierror(-3, 'Temporary failure in name resolution'))", "exc_info": "Traceback (most recent call last):
File "/prefect-server/src/prefect_server/services/loop_service.py", line 60, in run
await self.run_once()
File "/prefect-server/src/prefect_server/services/towel/lazarus.py", line 37, in run_once
return await self.reschedule_flow_runs()
File "/prefect-server/src/prefect_server/services/towel/lazarus.py", line 89, in reschedule_flow_runs
limit=5000,
File "/prefect-server/src/prefect_server/database/orm.py", line 501, in get
as_box=not apply_schema,
File "/prefect-server/src/prefect_server/database/hasura.py", line 85, in execute
as_box=as_box,
File "/prefect-server/src/prefect_server/utilities/graphql.py", line 64, in execute
timeout=30,
File "/usr/local/lib/python3.7/site-packages/httpx/_client.py", line 1385, in post
timeout=timeout,
File "/usr/local/lib/python3.7/site-packages/httpx/_client.py", line 1148, in request
request, auth=auth, allow_redirects=allow_redirects, timeout=timeout,
File "/usr/local/lib/python3.7/site-packages/httpx/_client.py", line 1169, in send
request, auth=auth, timeout=timeout, allow_redirects=allow_redirects,
File "/usr/local/lib/python3.7/site-packages/httpx/_client.py", line 1196, in send_handling_redirects
request, auth=auth, timeout=timeout, history=history
File "/usr/local/lib/python3.7/site-packages/httpx/_client.py", line 1232, in send_handling_auth
response = await self.send_single_request(request, timeout)
File "/usr/local/lib/python3.7/site-packages/httpx/_client.py", line 1269, in send_single_request
timeout=timeout.as_dict(),
File "/usr/local/lib/python3.7/site-packages/httpcore/_async/connection_pool.py", line 153, in request
method, url, headers=headers, stream=stream, timeout=timeout
File "/usr/local/lib/python3.7/site-packages/httpcore/_async/connection.py", line 65, in request
self.socket = await self._open_socket(timeout)
File "/usr/local/lib/python3.7/site-packages/httpcore/_async/connection.py", line 86, in _open_socket
hostname, port, ssl_context, timeout
File "/usr/local/lib/python3.7/site-packages/httpcore/_backends/auto.py", line 38, in open_tcp_stream
return await self.backend.open_tcp_stream(hostname, port, ssl_context, timeout)
File "/usr/local/lib/python3.7/site-packages/httpcore/_backends/asyncio.py", line 234, in open_tcp_stream
stream_reader=stream_reader, stream_writer=stream_writer
File "/usr/local/lib/python3.7/contextlib.py", line 130, in __exit__
self.gen.throw(type, value, traceback)
File "/usr/local/lib/python3.7/site-packages/httpcore/_exceptions.py", line 12, in map_exceptions
raise to_exc(exc) from None
httpcore._exceptions.ConnectError: [Errno -3] Temporary failure in name resolution"}
{"severity": "ERROR", "name": "prefect-server.Scheduler", "message": "Unexpected error: ConnectError(gaierror(-3, 'Temporary failure in name resolution'))", "exc_info": "Traceback (most recent call last):
File "/prefect-server/src/prefect_server/services/loop_service.py", line 60, in run
await self.run_once()
File "/prefect-server/src/prefect_server/services/towel/scheduler.py", line 46, in run_once
offset=500 * iterations,
File "/prefect-server/src/prefect_server/database/orm.py", line 501, in get
as_box=not apply_schema,
File "/prefect-server/src/prefect_server/database/hasura.py", line 85, in execute
as_box=as_box,
File "/prefect-server/src/prefect_server/utilities/graphql.py", line 64, in execute
timeout=30,
File "/usr/local/lib/python3.7/site-packages/httpx/_client.py", line 1385, in post
timeout=timeout,
File "/usr/local/lib/python3.7/site-packages/httpx/_client.py", line 1148, in request
request, auth=auth, allow_redirects=allow_redirects, timeout=timeout,
File "/usr/local/lib/python3.7/site-packages/httpx/_client.py", line 1169, in send
request, auth=auth, timeout=timeout, allow_redirects=allow_redirects,
File "/usr/local/lib/python3.7/site-packages/httpx/_client.py", line 1196, in send_handling_redirects
request, auth=auth, timeout=timeout, history=history
File "/usr/local/lib/python3.7/site-packages/httpx/_client.py", line 1232, in send_handling_auth
response = await self.send_single_request(request, timeout)
File "/usr/local/lib/python3.7/site-packages/httpx/_client.py", line 1269, in send_single_request
timeout=timeout.as_dict(),
File "/usr/local/lib/python3.7/site-packages/httpcore/_async/connection_pool.py", line 153, in request
method, url, headers=headers, stream=stream, timeout=timeout
File "/usr/local/lib/python3.7/site-packages/httpcore/_async/connection.py", line 65, in request
self.socket = await self._open_socket(timeout)
File "/usr/local/lib/python3.7/site-packages/httpcore/_async/connection.py", line 86, in _open_socket
hostname, port, ssl_context, timeout
File "/usr/local/lib/python3.7/site-packages/httpcore/_backends/auto.py", line 38, in open_tcp_stream
return await self.backend.open_tcp_stream(hostname, port, ssl_context, timeout)
File "/usr/local/lib/python3.7/site-packages/httpcore/_backends/asyncio.py", line 234, in open_tcp_stream
stream_reader=stream_reader, stream_writer=stream_writer
File "/usr/local/lib/python3.7/contextlib.py", line 130, in __exit__
self.gen.throw(type, value, traceback)
File "/usr/local/lib/python3.7/site-packages/httpcore/_exceptions.py", line 12, in map_exceptions
raise to_exc(exc) from None
httpcore._exceptions.ConnectError: [Errno -3] Temporary failure in name resolution"}
{"severity": "ERROR", "name": "prefect-server.ZombieKiller", "message": "Unexpected error: ConnectError(gaierror(-3, 'Temporary failure in name resolution'))", "exc_info": "Traceback (most recent call last):
File "/prefect-server/src/prefect_server/services/loop_service.py", line 60, in run
await self.run_once()
File "/prefect-server/src/prefect_server/services/towel/zombie_killer.py", line 216, in run_once
await self.reap_zombie_task_runs()
File "/prefect-server/src/prefect_server/services/towel/zombie_killer.py", line 153, in reap_zombie_task_runs
apply_schema=False,
File "/prefect-server/src/prefect_server/database/orm.py", line 501, in get
as_box=not apply_schema,
File "/prefect-server/src/prefect_server/database/hasura.py", line 85, in execute
as_box=as_box,
File "/prefect-server/src/prefect_server/utilities/graphql.py", line 64, in execute
timeout=30,
File "/usr/local/lib/python3.7/site-packages/httpx/_client.py", line 1385, in post
timeout=timeout,
File "/usr/local/lib/python3.7/site-packages/httpx/_client.py", line 1148, in request
request, auth=auth, allow_redirects=allow_redirects, timeout=timeout,
File "/usr/local/lib/python3.7/site-packages/httpx/_client.py", line 1169, in send
request, auth=auth, timeout=timeout, allow_redirects=allow_redirects,
File "/usr/local/lib/python3.7/site-packages/httpx/_client.py", line 1196, in send_handling_redirects
request, auth=auth, timeout=timeout, history=history
File "/usr/local/lib/python3.7/site-packages/httpx/_client.py", line 1232, in send_handling_auth
response = await self.send_single_request(request, timeout)
File "/usr/local/lib/python3.7/site-packages/httpx/_client.py", line 1269, in send_single_request
timeout=timeout.as_dict(),
File "/usr/local/lib/python3.7/site-packages/httpcore/_async/connection_pool.py", line 153, in request
method, url, headers=headers, stream=stream, timeout=timeout
File "/usr/local/lib/python3.7/site-packages/httpcore/_async/connection.py", line 65, in request
self.socket = await self._open_socket(timeout)
File "/usr/local/lib/python3.7/site-packages/httpcore/_async/connection.py", line 86, in _open_socket
hostname, port, ssl_context, timeout
File "/usr/local/lib/python3.7/site-packages/httpcore/_backends/auto.py", line 38, in open_tcp_stream
return await self.backend.open_tcp_stream(hostname, port, ssl_context, timeout)
File "/usr/local/lib/python3.7/site-packages/httpcore/_backends/asyncio.py", line 234, in open_tcp_stream
stream_reader=stream_reader, stream_writer=stream_writer
File "/usr/local/lib/python3.7/contextlib.py", line 130, in __exit__
self.gen.throw(type, value, traceback)
File "/usr/local/lib/python3.7/site-packages/httpcore/_exceptions.py", line 12, in map_exceptions
raise to_exc(exc) from None
httpcore._exceptions.ConnectError: [Errno -3] Temporary failure in name resolution"}
{"severity": "INFO", "name": "prefect-server.ZombieKiller", "message": "Zombie killer found 1 flow runs."}
{"severity": "INFO", "name": "prefect-server.ZombieKiller", "message": "Addressed 1 zombie flow runs."}
{"severity": "INFO", "name": "prefect-server.Scheduler", "message": "Scheduled 0 flow runs."}
{"severity": "INFO", "name": "prefect-server.Scheduler", "message": "Scheduled 0 flow runs."}
{"severity": "DEBUG", "name": "prefect-server.Scheduler", "message": "Heartbeat from Scheduler: next run at 2021-09-01T00:02:17+00:00"}
{"severity": "DEBUG", "name": "prefect-server.ZombieKiller", "message": "Heartbeat from ZombieKiller: next run at 2021-09-01T00:02:47+00:00"}
{"severity": "INFO", "name": "prefect-server.Scheduler", "message": "Scheduled 0 flow runs."}
{"severity": "INFO", "name": "prefect-server.Lazarus", "message": "Found 1 flow runs to reschedule with a Lazarus process"}
{"severity": "DEBUG", "name": "prefect-server.Lazarus", "message": "Heartbeat from Lazarus: next run at 2021-09-01T00:14:47+00:00"}
{"severity": "INFO", "name": "prefect-server.Scheduler", "message": "Scheduled 0 flow runs."}
{"severity": "DEBUG", "name": "prefect-server.Scheduler", "message": "Heartbeat from Scheduler: next run at 2021-09-01T00:07:17+00:00"}
{"severity": "DEBUG", "name": "prefect-server.ZombieKiller", "message": "Heartbeat from ZombieKiller: next run at 2021-09-01T00:08:47+00:00"}
{"severity": "INFO", "name": "prefect-server.Scheduler", "message": "Scheduled 0 flow runs."}
{"severity": "INFO", "name": "prefect-server.ZombieKiller", "message": "Zombie killer found 1 flow runs."}
{"severity": "INFO", "name": "prefect-server.ZombieKiller", "message": "Addressed 1 zombie flow runs."}
{"severity": "INFO", "name": "prefect-server.Scheduler", "message": "Scheduled 0 flow runs."}
{"severity": "DEBUG", "name": "prefect-server.Scheduler", "message": "Heartbeat from Scheduler: next run at 2021-09-01T00:12:17+00:00"}
{"severity": "INFO", "name": "prefect-server.Scheduler", "message": "Scheduled 0 flow runs."}
{"severity": "DEBUG", "name": "prefect-server.ZombieKiller", "message": "Heartbeat from ZombieKiller: next run at 2021-09-01T00:14:47+00:00"}
{"severity": "INFO", "name": "prefect-server.Lazarus", "message": "Found 2 flow runs to reschedule with a Lazarus process"}
{"severity": "INFO", "name": "prefect-server.Lazarus", "message": "Lazarus process rescheduled 2 flow runs."}
{"severity": "DEBUG", "name": "prefect-server.Lazarus", "message": "Heartbeat from Lazarus: next run at 2021-09-01T00:24:47+00:00"}
{"severity": "INFO", "name": "prefect-server.Scheduler", "message": "Scheduled 0 flow runs."}
{"severity": "DEBUG", "name": "prefect-server.Scheduler", "message": "Heartbeat from Scheduler: next run at 2021-09-01T00:17:17+00:00"}
Kevin Kho
Hamza Ahmed
09/01/2021, 3:14 AMhelm install prefect prefecthq/prefect-server \
--set agent.enabled=true \
--set serverVersionTag=core-0.15.4 \
--set prefectVersionTag=0.15.4-python3.8 \
--set uiVersionTag=core-0.15.4 \
--set postgresql.postgresqlDatabase=prefect \
--set postgresql.postgresqlUsername=<database username> \
--set postgresql.existingSecret=<pre-existing postgresql password secret> \
--set postgresql.externalHostname=<rds database hostname> \
--set postgresql.useSubChart=false
The postgresql database had data from the prior version of prefect (including flow run history)Kevin Kho
Hamza Ahmed
09/01/2021, 3:41 AMHamza Ahmed
09/01/2021, 3:41 AM[2021-09-01 03:38:07+0000] INFO - prefect.CloudFlowRunner | Beginning Flow run for 'Pre-flight checks'
[2021-09-01 03:38:07+0000] INFO - prefect.DaskExecutor | Creating a new Dask cluster with `pypline.lib.deployment.get_dask_executor.<locals>.<lambda>`...
distributed.http.proxy - INFO - To route to workers diagnostics web server please install jupyter-server-proxy: python -m pip install jupyter-server-proxy
distributed.scheduler - INFO - Clear task state
distributed.scheduler - INFO - Scheduler at: <tcp://10.2.4.166:38257>
distributed.scheduler - INFO - dashboard at: :8787
[2021-09-01 03:38:07+0000] INFO - prefect.DaskExecutor | The Dask dashboard is available at <http://10.2.4.166:8787/status>
distributed.deploy.adaptive - INFO - Adaptive scaling started: minimum=1 maximum=50
distributed.scheduler - INFO - Receive client connection: Client-05e0b61e-0ad6-11ec-8001-3e55b53415d8
distributed.core - INFO - Starting established connection
distributed.scheduler - INFO - Register worker <Worker '<tcp://10.2.4.170:39799>', name: 0, memory: 0, processing: 4>
distributed.scheduler - INFO - Starting worker compute stream, <tcp://10.2.4.170:39799>
distributed.core - INFO - Starting established connection
distributed.scheduler - INFO - Remove client Client-05e0b61e-0ad6-11ec-8001-3e55b53415d8
distributed.scheduler - INFO - Remove client Client-05e0b61e-0ad6-11ec-8001-3e55b53415d8
distributed.scheduler - INFO - Close client connection: Client-05e0b61e-0ad6-11ec-8001-3e55b53415d8
distributed.scheduler - INFO - Scheduler closing...
distributed.scheduler - INFO - Scheduler closing all comms
distributed.scheduler - INFO - Remove worker <Worker '<tcp://10.2.4.170:39799>', name: 0, memory: 0, processing: 0>
distributed.core - INFO - Removing comms to <tcp://10.2.4.170:39799>
distributed.scheduler - INFO - Lost all workers
[2021-09-01 03:38:13+0000] INFO - prefect.CloudFlowRunner | Flow run RUNNING: terminal tasks are incomplete.
Unclosed client session
client_session: <aiohttp.client.ClientSession object at 0x7fbdb37af130>
Unclosed connector
connections: ['[(<aiohttp.client_proto.ResponseHandler object at 0x7fbdb0b42dc0>, 15701.659448984)]']
connector: <aiohttp.connector.TCPConnector object at 0x7fbdb37adb80>
Fatal error on SSL transport
protocol: <asyncio.sslproto.SSLProtocol object at 0x7fbdb0c63460>
transport: <_SelectorSocketTransport closing fd=23>
Traceback (most recent call last):
File "/usr/local/lib/python3.8/asyncio/selector_events.py", line 910, in write
n = self._sock.send(data)
OSError: [Errno 9] Bad file descriptor
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/local/lib/python3.8/asyncio/sslproto.py", line 685, in _process_write_backlog
self._transport.write(chunk)
File "/usr/local/lib/python3.8/asyncio/selector_events.py", line 916, in write
self._fatal_error(exc, 'Fatal write error on socket transport')
File "/usr/local/lib/python3.8/asyncio/selector_events.py", line 711, in _fatal_error
self._force_close(exc)
File "/usr/local/lib/python3.8/asyncio/selector_events.py", line 723, in _force_close
self._loop.call_soon(self._call_connection_lost, exc)
File "/usr/local/lib/python3.8/asyncio/base_events.py", line 719, in call_soon
self._check_closed()
File "/usr/local/lib/python3.8/asyncio/base_events.py", line 508, in _check_closed
raise RuntimeError('Event loop is closed')
RuntimeError: Event loop is closed
Task was destroyed but it is pending!
task: <Task pending name='Task-135' coro=<AdaptiveCore.adapt() running at /usr/local/lib/python3.8/site-packages/distributed/deploy/adaptive_core.py:191> wait_for=<Future pending cb=[<TaskWakeupMethWrapper object at 0x7fbdb0b9de50>()]> cb=[IOLoop.add_future.<locals>.<lambda>() at /usr/local/lib/python3.8/site-packages/tornado/ioloop.py:688]>
Hamza Ahmed
09/01/2021, 3:42 AMKevin Kho
Kevin Kho
Hamza Ahmed
09/01/2021, 3:46 AMHamza Ahmed
09/01/2021, 3:46 AMKevin Kho
Hamza Ahmed
09/01/2021, 3:50 AMZanie
Hamza Ahmed
09/01/2021, 6:27 PMERROR - prefect.CloudTaskRunner | Failed to retrieve task state with error: ConnectionError(MaxRetryError("HTTPConnectionPool(host='localhost', port=4200): Max retries exceeded with url: /graphql (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x7f5249fea820>: Failed to establish a new connection: [Errno 111] Connection refused'))"))
Hamza Ahmed
09/01/2021, 6:31 PMlocalhost
Kevin Kho
Zanie
Zanie
Hamza Ahmed
09/01/2021, 7:20 PMHamza Ahmed
09/01/2021, 7:21 PMKevin Kho
Kevin Kho
Hamza Ahmed
09/01/2021, 7:32 PMHamza Ahmed
09/01/2021, 7:33 PMZanie
master
image it should have this fix in nowHamza Ahmed
09/01/2021, 7:43 PM["aws", "viz", "templates","kubernetes"]
be included if I just use git+<https://github.com/PrefectHQ/prefect.git>
to add the master package?Zanie
Hamza Ahmed
09/01/2021, 7:50 PMKevin Kho
Hamza Ahmed
09/01/2021, 10:25 PMHamza Ahmed
09/01/2021, 10:25 PM