Moe
12/02/2024, 6:04 PMBianca Hoch
12/02/2024, 10:23 PMDataContext
was deprecated and removed after v0.20. For the prefect-great-expectations library, the DataContext
is referenced here for running validations.
from prefect import flow
from prefect_great_expectations import run_checkpoint_validation
import great_expectations as gx
def create_expectation_suite_and_checkpoint():
"""Create a DataContext, connect to data, create Expectations, create and return a checkpoint."""
context = gx.get_context()
validator = context.sources.pandas_default.read_csv(
"<https://raw.githubusercontent.com/great-expectations/gx_tutorials/main/data/yellow_tripdata_sample_2019-01.csv>"
)
validator.expect_column_values_to_not_be_null("pickup_datetime")
# this expectation will fail
validator.expect_column_values_to_be_between(
"passenger_count", min_value=1, max_value=5
)
# checkpoints are reusble and only need to be created once
checkpoint = gx.checkpoint.SimpleCheckpoint(
name="taxi_check",
data_context=context,
validator=validator,
)
return checkpoint
@flow
def validation_flow(checkpoint):
"""Creates a task that validates a run of a Great Expectations checkpoint"""
res = run_checkpoint_validation(checkpoint=checkpoint)
return
if __name__ == "__main__":
checkpoint = create_expectation_suite_and_checkpoint()
validation_flow(checkpoint=checkpoint)
Bianca Hoch
12/02/2024, 10:24 PM