Hi, I am trying to use the Prefect kubernetes work...
# ask-community
w
Hi, I am trying to use the Prefect kubernetes worker and it seems all set up. It creates the jobs but it seems to fail at
git clone
. I connected to the job from the remote shell and it seems the user doesnt have write permissions to any where in the file system. Is ther eany solution that doesn't involve giving root permissions to the job
m
This will be due to the docker image the job is using. Can you provide some more information about the image you are using? Here is one I developed which works for me:
Copy code
# Start with the base image
FROM prefecthq/prefect:2-python3.11

# Set the POETRY_HOME environment variable
ENV POETRY_HOME="/opt/poetry"

# Update PATH to include the Poetry bin directory
ENV PATH="$POETRY_HOME/bin:$PATH"

# Install necessary tools
RUN apt-get update && \
    apt-get install -y g++ unixodbc-dev curl gnupg2 git && \
    pip install adlfs pyodbc sqlalchemy requests_toolbelt && \
    curl <https://packages.microsoft.com/keys/microsoft.asc> | apt-key add - && \
    curl <https://packages.microsoft.com/config/debian/11/prod.list> > /etc/apt/sources.list.d/mssql-release.list && \
    apt-get update && \
    ACCEPT_EULA=Y apt-get install -y msodbcsql17 && \
    ACCEPT_EULA=Y apt-get install -y mssql-tools && \
    apt-get install -y libgssapi-krb5-2 && \
    curl -sSL <https://install.python-poetry.org> | python -

# Set environment variable for the PATH and provide a default value for the GIT_REPO
ENV PATH="${PATH}:/opt/mssql-tools/bin"

# prevent poetry from creating venv, so everything is installed globally
RUN poetry config virtualenvs.create false

# Use our custom entrypoint
COPY entrypoint.sh /opt/prefect_k8_job/entrypoint.sh
RUN chmod +x /opt/prefect_k8_job/entrypoint.sh
ENTRYPOINT ["/usr/bin/tini", "-g", "--", "/opt/prefect_k8_job/entrypoint.sh"]
In order to use Prefect features such as task caching, you also need to configure the work pool to use a PVC so that the cache is persistent across transient job runs:
Copy code
{
  "variables": {
    "type": "object",
    "properties": {
      "env": {
        "type": "object",
        "title": "Environment Variables",
        "description": "Environment variables to set when starting a flow run.",
        "additionalProperties": {
          "type": "string"
        }
      },
      "name": {
        "type": "string",
        "title": "Name",
        "description": "Name given to infrastructure created by a worker."
      },
      "image": {
        "type": "string",
        "title": "Image",
        "example": "<http://docker.io/prefecthq/prefect:2-latest|docker.io/prefecthq/prefect:2-latest>",
        "description": "The image reference of a container image to use for created jobs. If not set, the latest Prefect image will be used."
      },
      "labels": {
        "type": "object",
        "title": "Labels",
        "description": "Labels applied to infrastructure created by a worker.",
        "additionalProperties": {
          "type": "string"
        }
      },
      "command": {
        "type": "string",
        "title": "Command",
        "description": "The command to use when starting a flow run. In most cases, this should be left blank and the command will be automatically generated by the worker."
      },
      "namespace": {
        "type": "string",
        "title": "Namespace",
        "default": "prefect",
        "description": "The Kubernetes namespace to create jobs within."
      },
      "stream_output": {
        "type": "boolean",
        "title": "Stream Output",
        "default": true,
        "description": "If set, output will be streamed from the job to local standard output."
      },
      "cluster_config": {
        "allOf": [
          {
            "$ref": "#/definitions/KubernetesClusterConfig"
          }
        ],
        "title": "Cluster Config",
        "description": "The Kubernetes cluster config to use for job creation."
      },
      "finished_job_ttl": {
        "type": "integer",
        "title": "Finished Job TTL",
        "default": 6200,
        "description": "The number of seconds to retain jobs after completion. If set, finished jobs will be cleaned up by Kubernetes after the given delay. If not set, jobs will be retained indefinitely."
      },
      "image_pull_policy": {
        "enum": [
          "IfNotPresent",
          "Always",
          "Never"
        ],
        "type": "string",
        "title": "Image Pull Policy",
        "default": "IfNotPresent",
        "description": "The Kubernetes image pull policy to use for job containers."
      },
      "service_account_name": {
        "type": "string",
        "title": "Service Account Name",
        "description": "The Kubernetes service account to use for job creation."
      },
      "job_watch_timeout_seconds": {
        "type": "integer",
        "title": "Job Watch Timeout Seconds",
        "description": "Number of seconds to wait for each event emitted by a job before timing out. If not set, the worker will wait for each event indefinitely."
      },
      "pod_watch_timeout_seconds": {
        "type": "integer",
        "title": "Pod Watch Timeout Seconds",
        "default": 60,
        "description": "Number of seconds to watch for pod creation before timing out."
      }
    },
    "definitions": {
      "KubernetesClusterConfig": {
        "type": "object",
        "title": "KubernetesClusterConfig",
        "required": [
          "config",
          "context_name"
        ],
        "properties": {
          "config": {
            "type": "object",
            "title": "Config",
            "description": "The entire contents of a kubectl config file."
          },
          "context_name": {
            "type": "string",
            "title": "Context Name",
            "description": "The name of the kubectl context to use."
          }
        },
        "description": "Stores configuration for interaction with Kubernetes clusters.\n\nSee `from_file` for creation.",
        "secret_fields": [],
        "block_type_slug": "kubernetes-cluster-config",
        "block_schema_references": {}
      }
    },
    "description": "Default variables for the Kubernetes worker.\n\nThe schema for this class is used to populate the `variables` section of the default\nbase job template."
  },
  "job_configuration": {
    "env": "{{ env }}",
    "name": "{{ name }}",
    "labels": "{{ labels }}",
    "command": "{{ command }}",
    "namespace": "{{ namespace }}",
    "job_manifest": {
      "kind": "Job",
      "spec": {
        "template": {
          "spec": {
            "volumes": [
              {
                "name": "data",
                "persistentVolumeClaim": {
                  "claimName": "prefect-pvc"
                }
              }
            ],
            "containers": [
              {
                "env": "{{ env }}",
                "args": "{{ command }}",
                "name": "prefect-job",
                "image": "{{ image }}",
                "volumeMounts": [
                  {
                    "name": "data",
                    "mountPath": "/root/.prefect"
                  }
                ],
                "imagePullPolicy": "{{ image_pull_policy }}"
              }
            ],
            "completions": 1,
            "parallelism": 1,
            "restartPolicy": "Never",
            "serviceAccountName": "{{ service_account_name }}"
          }
        },
        "backoffLimit": 0,
        "ttlSecondsAfterFinished": "{{ finished_job_ttl }}"
      },
      "metadata": {
        "labels": "{{ labels }}",
        "namespace": "{{ namespace }}",
        "generateName": "{{ name }}-"
      },
      "apiVersion": "batch/v1"
    },
    "stream_output": "{{ stream_output }}",
    "cluster_config": "{{ cluster_config }}",
    "job_watch_timeout_seconds": "{{ job_watch_timeout_seconds }}",
    "pod_watch_timeout_seconds": "{{ pod_watch_timeout_seconds }}"
  }
}