Daryl
02/28/2024, 6:16 AMNate
02/28/2024, 1:50 PMdocker logs MYCONTAINERID
if you're still using docker compose
โข I have not yet noticed crashes, but as mentioned I haven't really pushed the envelope yetDaryl
02/28/2024, 2:58 PMDaryl
02/28/2024, 3:28 PM00:57:31.866 | ERROR | prefect.server.services.flowrunnotifications - Unexpected error in: OperationalError('(sqlite3.OperationalError) database is locked')
Traceback (most recent call last):
File "/usr/local/lib/python3.11/site-packages/sqlalchemy/engine/base.py", line 1960, in _exec_single_context
self.dialect.do_execute(
File "/usr/local/lib/python3.11/site-packages/sqlalchemy/engine/default.py", line 924, in do_execute
cursor.execute(statement, parameters)
File "/usr/local/lib/python3.11/site-packages/sqlalchemy/dialects/sqlite/aiosqlite.py", line 146, in execute
self._adapt_connection._handle_exception(error)
File "/usr/local/lib/python3.11/site-packages/sqlalchemy/dialects/sqlite/aiosqlite.py", line 298, in _handle_exception
raise error
File "/usr/local/lib/python3.11/site-packages/sqlalchemy/dialects/sqlite/aiosqlite.py", line 128, in execute
self.await_(_cursor.execute(operation, parameters))
File "/usr/local/lib/python3.11/site-packages/sqlalchemy/util/_concurrency_py3k.py", line 127, in await_only
return current.driver.switch(awaitable) # type: ignore[no-any-return]
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/site-packages/sqlalchemy/util/_concurrency_py3k.py", line 192, in greenlet_spawn
value = await result
^^^^^^^^^^^^
File "/usr/local/lib/python3.11/site-packages/aiosqlite/cursor.py", line 48, in execute
await self._execute(self._cursor.execute, sql, parameters)
File "/usr/local/lib/python3.11/site-packages/aiosqlite/cursor.py", line 40, in _execute
return await self._conn._execute(fn, *args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/site-packages/aiosqlite/core.py", line 132, in _execute
return await future
^^^^^^^^^^^^
File "/usr/local/lib/python3.11/site-packages/aiosqlite/core.py", line 115, in run
result = function()
^^^^^^^^^^
sqlite3.OperationalError: database is locked
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/usr/local/lib/python3.11/site-packages/prefect/server/services/loop_service.py", line 79, in start
await self.run_once()
File "/usr/local/lib/python3.11/site-packages/prefect/server/database/dependencies.py", line 119, in async_wrapper
return await fn(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/site-packages/prefect/server/services/flow_run_notifications.py", line 38, in run_once
notifications = await db.get_flow_run_notifications_from_queue(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/site-packages/prefect/server/database/interface.py", line 365, in get_flow_run_notifications_from_queue
return await self.queries.get_flow_run_notifications_from_queue(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/site-packages/prefect/server/database/query_components.py", line 1030, in get_flow_run_notifications_from_queue
await session.execute(delete_stmt)
File "/usr/local/lib/python3.11/site-packages/sqlalchemy/ext/asyncio/session.py", line 452, in execute
result = await greenlet_spawn(
^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/site-packages/sqlalchemy/util/_concurrency_py3k.py", line 197, in greenlet_spawn
result = context.throw(*sys.exc_info())
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/site-packages/sqlalchemy/orm/session.py", line 2306, in execute
return self._execute_internal(
^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/site-packages/sqlalchemy/orm/session.py", line 2191, in _execute_internal
result: Result[Any] = compile_state_cls.orm_execute_statement(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/site-packages/sqlalchemy/orm/bulk_persistence.py", line 1946, in orm_execute_statement
return super().orm_execute_statement(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/site-packages/sqlalchemy/orm/context.py", line 293, in orm_execute_statement
result = conn.execute(
^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/site-packages/sqlalchemy/engine/base.py", line 1408, in execute
return meth(
^^^^^
File "/usr/local/lib/python3.11/site-packages/sqlalchemy/sql/elements.py", line 513, in _execute_on_connection
return connection._execute_clauseelement(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/site-packages/sqlalchemy/engine/base.py", line 1630, in _execute_clauseelement
ret = self._execute_context(
^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/site-packages/sqlalchemy/engine/base.py", line 1839, in _execute_context
return self._exec_single_context(
^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/site-packages/sqlalchemy/engine/base.py", line 1979, in _exec_single_context
self._handle_dbapi_exception(
File "/usr/local/lib/python3.11/site-packages/sqlalchemy/engine/base.py", line 2335, in _handle_dbapi_exception
raise sqlalchemy_exception.with_traceback(exc_info[2]) from e
File "/usr/local/lib/python3.11/site-packages/sqlalchemy/engine/base.py", line 1960, in _exec_single_context
self.dialect.do_execute(
File "/usr/local/lib/python3.11/site-packages/sqlalchemy/engine/default.py", line 924, in do_execute
cursor.execute(statement, parameters)
File "/usr/local/lib/python3.11/site-packages/sqlalchemy/dialects/sqlite/aiosqlite.py", line 146, in execute
self._adapt_connection._handle_exception(error)
File "/usr/local/lib/python3.11/site-packages/sqlalchemy/dialects/sqlite/aiosqlite.py", line 298, in _handle_exception
raise error
File "/usr/local/lib/python3.11/site-packages/sqlalchemy/dialects/sqlite/aiosqlite.py", line 128, in execute
self.await_(_cursor.execute(operation, parameters))
File "/usr/local/lib/python3.11/site-packages/sqlalchemy/util/_concurrency_py3k.py", line 127, in await_only
return current.driver.switch(awaitable) # type: ignore[no-any-return]
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/site-packages/sqlalchemy/util/_concurrency_py3k.py", line 192, in greenlet_spawn
value = await result
^^^^^^^^^^^^
File "/usr/local/lib/python3.11/site-packages/aiosqlite/cursor.py", line 48, in execute
await self._execute(self._cursor.execute, sql, parameters)
File "/usr/local/lib/python3.11/site-packages/aiosqlite/cursor.py", line 40, in _execute
return await self._conn._execute(fn, *args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/site-packages/aiosqlite/core.py", line 132, in _execute
return await future
^^^^^^^^^^^^
File "/usr/local/lib/python3.11/site-packages/aiosqlite/core.py", line 115, in run
result = function()
^^^^^^^^^^
Nate
02/28/2024, 3:28 PMNate
02/28/2024, 3:28 PMNate
02/28/2024, 3:29 PMDaryl
02/28/2024, 3:29 PMDaryl
02/28/2024, 3:30 PMNate
02/28/2024, 3:30 PMNate
02/28/2024, 3:30 PMDaryl
02/28/2024, 3:31 PMDaryl
02/28/2024, 3:31 PMDaryl
02/28/2024, 3:34 PMNate
02/28/2024, 3:34 PMNate
02/28/2024, 3:34 PMDaryl
03/01/2024, 2:47 PMDaryl
03/01/2024, 2:47 PM03:14:57.169 | WARNING | prefect.server.services.cancellationcleanup - CancellationCleanup took 20.986676 seconds to run, which is longer than its loop interval of 20.0 seconds.
03:17:40.882 | WARNING | prefect.server.services.cancellationcleanup - CancellationCleanup took 23.537173 seconds to run, which is longer than its loop interval of 20.0 seconds.
03:20:41.330 | WARNING | prefect.server.services.cancellationcleanup - CancellationCleanup took 20.137331 seconds to run, which is longer than its loop interval of 20.0 seconds.
03:23:48.898 | WARNING | prefect.server.services.cancellationcleanup - CancellationCleanup took 27.209079 seconds to run, which is longer than its loop interval of 20.0 seconds.
03:26:36.752 | WARNING | prefect.server.services.cancellationcleanup - CancellationCleanup took 27.380853 seconds to run, which is longer than its loop interval of 20.0 seconds.
03:29:39.883 | WARNING | prefect.server.services.cancellationcleanup - CancellationCleanup took 22.671117 seconds to run, which is longer than its loop interval of 20.0 seconds.
03:32:24.949 | WARNING | prefect.server.services.cancellationcleanup - CancellationCleanup took 24.604528 seconds to run, which is longer than its loop interval of 20.0 seconds.
03:34:49.689 | WARNING | prefect.server.services.cancellationcleanup - CancellationCleanup took 24.256178 seconds to run, which is longer than its loop interval of 20.0 seconds.
03:35
Daryl
03/01/2024, 2:47 PMDaryl
03/02/2024, 12:43 AMNate
03/02/2024, 1:01 AMHappy to DM you the codeid love to stress test and see if we can update our handling
Nate
03/02/2024, 1:02 AMDaryl
03/02/2024, 2:08 AMDaryl
03/02/2024, 8:48 AMDaryl
03/02/2024, 8:49 AMDaryl
03/12/2024, 12:50 AMdocker logs
(since that seemed to be indicative of the crashed server flow controlling the subflow.).
Is there a way to have a backup server worker to fail back to in the web runner to take over for the other workers? (realizing that that may be getting a bit advanced.).Matthew Bell
03/14/2024, 9:53 PMMatthew Bell
03/14/2024, 10:11 PMmemory_profiler
module (found here)
from memory_profiler import profile
from prefect import flow, task
@task
@profile
def with_task():
return [{"abc": "123"} for _ in range(10_000)]
@profile
def without_task():
return [{"abc": "123"} for _ in range(10_000)]
@flow
@profile
def main():
for i in range(10):
# with_task()
without_task()
print("DONE")
main()
Run that with_task
and without_task
and watch how memory grows exponentially when we use tasks, but does not grow when we don't.Alexander Azzam
03/14/2024, 10:16 PMMatthew Bell
03/14/2024, 10:17 PMAlexander Azzam
03/14/2024, 10:21 PMMatthew Bell
03/14/2024, 10:22 PMAndrew Brookins
03/14/2024, 10:24 PMMatthew Bell
03/14/2024, 10:26 PMAndrew Brookins
03/14/2024, 10:27 PMAlexander Azzam
03/14/2024, 10:27 PMMatthew Bell
03/15/2024, 2:52 AMMatthew Bell
03/15/2024, 5:10 AMAndrew Brookins
03/15/2024, 5:07 PMMatthew Bell
03/15/2024, 5:12 PMMatthew Bell
03/15/2024, 5:14 PMMatthew Bell
03/15/2024, 5:22 PMtracemalloc
Andrew Brookins
03/15/2024, 5:22 PMprefect version
It should look like this, with extended output, not just the version #:
prefect version
Version: 2.16.3
API version: 0.8.4
Python version: 3.12.2
Git commit: e3f02c00
Built: Thu, Mar 7, 2024 4:56 PM
OS/Arch: darwin/arm64
Profile: default
Server type: cloud
Also, Python version and OS version? I suppose Python will be in that output, so just OS would help.Matthew Bell
03/15/2024, 5:24 PMPython 3.12.1
Apple M3, Sonoma 14.2.1
prefect version
Version: 2.16.4
API version: 0.8.4
Python version: 3.12.1
Git commit: e3e7df9d
Built: Thu, Mar 14, 2024 5:11 PM
OS/Arch: darwin/arm64
Profile: production
Server type: cloud
Andrew Brookins
03/15/2024, 5:24 PMMatthew Bell
03/15/2024, 5:25 PMAndrew Brookins
03/15/2024, 5:26 PMAndrew Brookins
03/15/2024, 5:27 PMMatthew Bell
03/15/2024, 5:29 PMfrom memory_profiler import profile
from prefect import flow, task
@task
@profile
def with_task():
return [{"abc": "123"} for _ in range(10_000)]
@profile
def without_task():
return [{"abc": "123"} for _ in range(10_000)]
@flow
@profile
def main():
for i in range(5):
with_task()
# without_task()
print("DONE")
main()
13:27:57.390 | INFO | prefect.engine - Created flow run 'tomato-civet' for flow 'main'
13:27:57.393 | INFO | Flow run 'tomato-civet' - View at <https://app.prefect.cloud/account/9a597790-3884-4982-99c8-7f9f55834ae7/workspace/38e23fa4-ca5f-4a46-9bcf-7b9ec03077ef/flow-runs/flow-run/6a95fe30-0b63-4ac6-aafd-063663325c45>
13:27:57.989 | INFO | Flow run 'tomato-civet' - Created task run 'with_task-0' for task 'with_task'
13:27:57.991 | INFO | Flow run 'tomato-civet' - Executing 'with_task-0' immediately...
Filename: /Users/matthewbell/github/kpi-pipes/src/cdk_flow/test_flow.py
Line # Mem usage Increment Occurrences Line Contents
=============================================================
5 188.4 MiB 188.4 MiB 1 @task
6 @profile
7 def with_task():
8 190.4 MiB 1.9 MiB 10001 return [{"abc": "123"} for _ in range(10_000)]
13:27:58.662 | INFO | Task run 'with_task-0' - Finished in state Completed()
13:27:58.803 | INFO | Flow run 'tomato-civet' - Created task run 'with_task-1' for task 'with_task'
13:27:58.805 | INFO | Flow run 'tomato-civet' - Executing 'with_task-1' immediately...
Filename: /Users/matthewbell/github/kpi-pipes/src/cdk_flow/test_flow.py
Line # Mem usage Increment Occurrences Line Contents
=============================================================
5 193.0 MiB 193.0 MiB 1 @task
6 @profile
7 def with_task():
8 194.8 MiB 1.8 MiB 10001 return [{"abc": "123"} for _ in range(10_000)]
13:27:59.607 | INFO | Task run 'with_task-1' - Finished in state Completed()
13:27:59.737 | INFO | Flow run 'tomato-civet' - Created task run 'with_task-2' for task 'with_task'
13:27:59.739 | INFO | Flow run 'tomato-civet' - Executing 'with_task-2' immediately...
Filename: /Users/matthewbell/github/kpi-pipes/src/cdk_flow/test_flow.py
Line # Mem usage Increment Occurrences Line Contents
=============================================================
5 195.7 MiB 195.7 MiB 1 @task
6 @profile
7 def with_task():
8 197.5 MiB 1.9 MiB 10001 return [{"abc": "123"} for _ in range(10_000)]
13:28:00.372 | INFO | Task run 'with_task-2' - Finished in state Completed()
13:28:00.526 | INFO | Flow run 'tomato-civet' - Created task run 'with_task-3' for task 'with_task'
13:28:00.528 | INFO | Flow run 'tomato-civet' - Executing 'with_task-3' immediately...
Filename: /Users/matthewbell/github/kpi-pipes/src/cdk_flow/test_flow.py
Line # Mem usage Increment Occurrences Line Contents
=============================================================
5 198.6 MiB 198.6 MiB 1 @task
6 @profile
7 def with_task():
8 200.4 MiB 1.8 MiB 10001 return [{"abc": "123"} for _ in range(10_000)]
13:28:01.269 | INFO | Task run 'with_task-3' - Finished in state Completed()
13:28:01.424 | INFO | Flow run 'tomato-civet' - Created task run 'with_task-4' for task 'with_task'
13:28:01.425 | INFO | Flow run 'tomato-civet' - Executing 'with_task-4' immediately...
Filename: /Users/matthewbell/github/kpi-pipes/src/cdk_flow/test_flow.py
Line # Mem usage Increment Occurrences Line Contents
=============================================================
5 200.9 MiB 200.9 MiB 1 @task
6 @profile
7 def with_task():
8 202.7 MiB 1.8 MiB 10001 return [{"abc": "123"} for _ in range(10_000)]
13:28:02.108 | INFO | Task run 'with_task-4' - Finished in state Completed()
DONE
Filename: /Users/matthewbell/github/kpi-pipes/src/cdk_flow/test_flow.py
Line # Mem usage Increment Occurrences Line Contents
=============================================================
16 185.1 MiB 185.1 MiB 1 @flow
17 @profile
18 def main():
19 205.5 MiB 0.0 MiB 6 for i in range(5):
20 205.5 MiB 20.4 MiB 5 with_task()
21 # without_task()
22 205.5 MiB 0.0 MiB 1 print("DONE")
13:28:02.291 | INFO | Flow run 'tomato-civet' - Finished in state Completed('All states completed.')
Andrew Brookins
03/15/2024, 5:32 PMMatthew Bell
03/15/2024, 5:47 PMflow_run_context.task_run_results
isn't getting cleared as we loop through our tasks. Commenting out line 2688 in prefect/engine.py
reduces the leak substantially, albeit not entirely.Matthew Bell
03/15/2024, 6:06 PMMatthew Bell
03/15/2024, 10:30 PMcache_results_in_memory=False
. This helps a ton with memory consumption and I believe addresses my above concern. But the leak still persists even with this.Matthew Bell
03/25/2024, 3:24 PMAlexander Azzam
03/25/2024, 3:26 PMMatthew Bell
03/25/2024, 3:27 PMAlexander Azzam
03/25/2024, 3:47 PM