https://prefect.io logo
Title
m

Mohammad Kaif Rizvi

03/16/2023, 4:38 AM
from prefect import task
from google.cloud import dataproc_v1beta2 as dataproc
@task
_def_ submit_pyspark_job():
# Create a client to interact with the Dataproc API
client = dataproc.JobControllerClient()
# Set the Dataproc cluster and job parameters
project_id = "<project-id>"
region = "<region>"
cluster_name = "<cluster-name>"
main_python_file_uri = "<main-python-file-uri>"
args = ["<args>"]
# Create the job request
job = {
"placement": {"cluster_name": cluster_name},
"pyspark_job": {
"main_python_file_uri": main_python_file_uri,
"args": args
}
}
# Submit the job to the Dataproc cluster
result = client.submit_job_as_operation(
_request_={
"project_id": project_id,
"region": region,
"job": job
}
)
# Get the ID of the submitted job
job_id = result.name.split("/")[-1]
print(_f_"Submitted PySpark job with ID: {job_id}")
is this a way to submit data proc jobs in prefect