如何解决在 Minikube 中,当驱动程序在 sparkapplication.yaml 部署时运行时,spark 驱动程序不会挂载 hostPath
我是 spark 和 minikube 的新手。我在 sparkapplication.yaml 中运行 spark job 时遇到了这个问题,spark driver 和 executor 已成功创建,但它们每个都没有挂载 hostPath。我提到了 Tom Louis 的 minikube-spark 示例。如果我直接通过 Dockfile// COPY ~~// 将数据放入 sparkjob 图像文件中,一切运行正常。
目前,data(*.csv) 在 localFolder - (mounted) - minikube - (notmounted) - spark driver Pod 中。
我不知道为什么没有挂载hostPath,可能是我做的一些错误^^; 有人可以看看我的问题吗?赞赏..!
模板/sparkapplication.yaml
apiVersion: sparkoperator.k8s.io/v1beta2
kind: SparkApplication
metadata:
name: {{ .Release.Name | trunc 63 }}
labels:
chartname: {{ .Chart.Name | trunc 63 | quote }}
release: {{ .Release.Name | trunc 63 | quote }}
revision: {{ .Release.Revision | quote }}
sparkVersion: {{ .Values.sparkVersion | quote }}
version: {{ .Chart.Version | quote }}
spec:
type: Scala
mode: cluster
image: {{ list .Values.imageRegistry .Values.image | join "/" | quote }}
imagePullPolicy: {{ .Values.imagePullPolicy }}
{{- if .Values.imagePullSecrets }}
imagePullSecrets:
{{- range .Values.imagePullSecrets }}
- {{ . | quote }}
{{- end }}
{{- end }}
mainClass: {{ .Values.mainClass | quote }}
mainApplicationFile: {{ .Values.jar | quote }}
{{- if .Values.arguments }}
arguments:
{{- range .Values.arguments }}
- {{ . | quote }}
{{- end }}
{{- end }}
sparkVersion: {{ .Values.sparkVersion | quote }}
restartPolicy:
type: Never
{{- if or .Values.jarDependencies .Values.fileDependencies .Values.sparkConf .Values.hadoopConf }}
deps:
{{- if .Values.jarDependencies }}
jars:
{{- range .Values.jarDependencies }}
- {{ . | quote }}
{{- end }}
{{- end }}
{{- if .Values.fileDependencies }}
files:
{{- range .Values.fileDependencies }}
- {{ . | quote }}
{{- end }}
{{- end }}
{{- if .Values.sparkConf }}
sparkConf:
{{- range $conf,$value := .Values.sparkConf }}
{{ $conf | quote }}: {{ $value | quote }}
{{- end }}
{{- end }}
{{- if .Values.hadoopConf }}
hadoopConf:
{{- range $conf,$value := .Values.hadoopConf }}
{{ $conf | quote }}: {{ $value | quote }}
{{- end }}
{{- end }}
{{- end }}
driver:
{{- if .Values.envSecretKeyRefs }}
envSecretKeyRefs:
{{- range $name,$value := .Values.envSecretKeyRefs }}
{{ $name }}:
name: {{ $value.name}}
key: {{ $value.key}}
{{- end }}
{{- end }}
{{- if .Values.envVars }}
envVars:
{{- range $name,$value := .Values.envVars }}
{{ $name }}: {{ $value | quote }}
{{- end }}
{{- end }}
securityContext:
runAsUser: {{ .Values.userId }}
cores: {{ .Values.driver.cores }}
coreLimit: {{ .Values.driver.coreLimit | default .Values.driver.cores | quote }}
memory: {{ .Values.driver.memory }}
hostNetwork: {{ .Values.hostNetwork }}
labels:
release: {{ .Release.Name | trunc 63 | quote }}
revision: {{ .Release.Revision | quote }}
sparkVersion: {{ .Values.sparkVersion | quote }}
version: {{ .Chart.Version | quote }}
serviceAccount: {{ .Values.serviceAccount }}
{{- if .Values.javaOptions }}
javaOptions: {{ .Values.javaOptions | quote}}
{{- end }}
{{- if .Values.mounts }}
volumeMounts:
{{- range $name,$path := .Values.mounts }}
- name: {{ $name }}
mountPath: {{ $path }}
{{- end }}
{{- end }}
{{- if .Values.tolerations }}
tolerations:
{{ toYaml .Values.tolerations | indent 6 }}
{{- end }}
executor:
{{- if .Values.envVars }}
envVars:
{{- range $name,$value := .Values.envVars }}
{{ $name | quote }}: {{ $value | quote }}
{{- end }}
{{- end }}
securityContext:
runAsUser: {{ .Values.userId }}
cores: {{ .Values.executor.cores }}
coreLimit: {{ .Values.executor.coreLimit | default .Values.executor.cores | quote }}
instances: {{ .Values.executor.instances }}
memory: {{ .Values.executor.memory }}
labels:
release: {{ .Release.Name | trunc 63 | quote }}
revision: {{ .Release.Revision | quote }}
sparkVersion: {{ .Values.sparkVersion | quote }}
version: {{ .Chart.Version | quote }}
serviceAccount: {{ .Values.serviceAccount }}
{{- if .Values.javaOptions }}
javaOptions: {{ .Values.javaOptions }}
{{- end }}
{{- if .Values.mounts }}
volumeMounts:
{{- range $name,$path := .Values.mounts }}
- name: {{ $name }}
mountPath: {{ $path }}
{{- end }}
{{- end }}
{{- if .Values.tolerations }}
tolerations:
{{ toYaml .Values.tolerations | indent 6 }}
{{- end }}
{{- if .Values.jmxExporterJar }}
monitoring:
exposeDriverMetrics: true
exposeExecutorMetrics: true
prometheus:
port: {{ .Values.jmxPort | default 8090 }}
jmxExporterJar: {{ .Values.jmxExporterJar }}
{{- end }}
{{- if .Values.volumes }}
volumes:
- name: input-data
hostPath:
path: /input-data
- name: output-data
hostPath:
path: /output-data
{{- end }}
{{- if .Values.nodeSelector }}
nodeSelector:
{{ toYaml .Values.nodeSelector | indent 4 }}
{{- end }}
values.yaml
# Generated by build.sbt. Please don't manually update
version: 0.1
sparkVersion: 3.0.2
image: kaspi/kaspi-sparkjob:0.1
jar: local:///opt/spark/jars/kaspi-kaspi-sparkjob.jar
mainClass: kaspi.sparkjob
fileDependencies: []
environment: minikube
serviceAccount: spark-spark
imageRegistry: localhost:5000
arguments:
- "/mnt/data-in/"
- "/mnt/data-out/"
volumes:
- name: input-data
hostPath:
path: /input-data
- name: output-data
hostPath:
path: /output-data
mounts:
input-data: /mnt/data-in
output-data: /mnt/data-out
driver:
cores: 1
memory: "2g"
executor:
instances: 2
cores: 1
memory: "1g"
hadoopConf:
sparkConf:
hostNetwork: false
imagePullPolicy: Never
userId: 0
build.sbt
val sparkVersion = "3.0.2"
val sparkLibs = Seq(
"org.apache.spark" %% "spark-core" % sparkVersion,"org.apache.spark" %% "spark-sql" % sparkVersion,"org.apache.spark" %% "spark-streaming" % sparkVersion,"org.apache.spark" %% "spark-mllib" % sparkVersion
)
lazy val commonSettings = Seq(
organization := "kaspi",scalaVersion := "2.12.13",version := "0.1",libraryDependencies ++= sparkLibs
)
val domain = "kaspi"
// for building FAT jar
lazy val assemblySettings = Seq(
assembly / assemblyOption := (assemblyOption in assembly).value.copy(includeScala = false),assembly / assemblyOutputPath := baseDirectory.value / "output" / s"${domain}-${name.value}.jar"
)
val targetDockerJarPath = "/opt/spark/jars"
val baseRegistry = sys.props.getOrElse("baseRegistry",default = "localhost:5000")
// for building docker image
lazy val dockerSettings = Seq(
imageNames in docker := Seq(
ImageName(s"$domain/${name.value}:latest"),ImageName(s"$domain/${name.value}:${version.value}"),),buildOptions in docker := BuildOptions(
cache = false,removeIntermediateContainers = BuildOptions.Remove.Always,pullBaseImage = BuildOptions.Pull.Always
),dockerfile in docker := {
// The assembly task generates a fat JAR file
val artifact: File = assembly.value
val artifactTargetPath = s"$targetDockerJarPath/$domain-${name.value}.jar"
new Dockerfile {
from(s"$baseRegistry/spark-runner:0.1")
}.add(artifact,artifactTargetPath)
}
)
// Include "provided" dependencies back to default run task
lazy val runLocalSettings = Seq(
// https://stackoverflow.com/questions/18838944/how-to-add-provided-dependencies-back-to-run-test-tasks-classpath/21803413#21803413
Compile / run := Defaults
.runTask(
fullClasspath in Compile,mainClass in (Compile,run),runner in (Compile,run)
)
.evaluated
)
lazy val root = (project in file("."))
.enablePlugins(sbtdocker.DockerPlugin)
.enablePlugins(AshScriptPlugin)
.settings(
commonSettings,assemblySettings,dockerSettings,runLocalSettings,name := "kaspi-sparkjob",Compile / mainClass := Some("kaspi.sparkjob"),Compile / resourceGenerators += createImporterHelmChart.taskValue
)
// Task to create helm chart
lazy val createImporterHelmChart: Def.Initialize[Task[Seq[File]]] = Def.task {
val chartFile = baseDirectory.value / "helm" / "Chart.yaml"
val valuesFile = baseDirectory.value / "helm" / "values.yaml"
val chartContents =
s"""# Generated by build.sbt. Please don't manually update
|apiVersion: v1
|name: $domain-${name.value}
|version: ${version.value}
|appVersion: ${version.value}
|description: ETL Job
|home: https://github.com/jyyoo0530/kaspi
|sources:
| - https://github.com/jyyoo0530/kaspi
|maintainers:
| - name: Jeremy Yoo
| email: jyyoo0530@gmail.com
| url: https://www.linkedin.com/in/jeeyoungyoo
|""".stripMargin
val valuesContents =
s"""# Generated by build.sbt. Please don't manually update
|version: ${version.value}
|sparkVersion: ${sparkVersion}
|image: $domain/${name.value}:${version.value}
|jar: local://$targetDockerJarPath/$domain-${name.value}.jar
|mainClass: ${(Compile / run / mainClass).value.getOrElse("__MAIN_CLASS__")}
|fileDependencies: []
|environment: minikube
|serviceAccount: spark-spark
|imageRegistry: localhost:5000
|arguments:
| - "/mnt/data-in/"
| - "/mnt/data-out/"
|volumes:
| - name: input-data
| hostPath:
| path: /input-data
| - name: output-data
| hostPath:
| path: /output-data
|mounts:
| input-data: /mnt/data-in
| output-data: /mnt/data-out
|driver:
| cores: 1
| memory: "2g"
|executor:
| instances: 2
| cores: 1
| memory: "1g"
|hadoopConf:
|sparkConf:
|hostNetwork: false
|imagePullPolicy: Never
|userId: 0
|""".stripMargin
IO.write(chartFile,chartContents)
IO.write(valuesFile,valuesContents)
Seq(chartFile,valuesFile)
}
lazy val showVersion = taskKey[Unit]("Show version")
showVersion := {
println((version).value)
}
assemblyMergeStrategy in assembly := {
case PathList("META-INF",xs @ _*) => MergeStrategy.discard
case x => MergeStrategy.first
}
******2021/2/25 更新 ********
我尝试在 yaml 下进行测试,然后在 Pod 中成功挂载了来自主机路径的卷。没有区别,但是对象特性不同,一个是“容器”,一个是“驱动程序”,“执行器”……等等。 (使用 gaffer-hdfs 时发生了同样的问题,其中 k8s 对象名称是“namenode”、“datanode”...等)。 使用自定义 kubernetes 对象名称会不会有问题?? 但是如果还是继承了容器的属性,,,就没有理由不挂载了。 ......所以......还在挣扎......! :)
apiVersion: v1
kind: Pod
metadata:
name: hostpath
namespace: spark-apps
spec:
containers:
- name: nginx
image: nginx
volumeMounts:
- name: volumepath
mountPath: /mnt/data
volumes:
- name: volumepath
hostPath:
path: /input-data
type: Directory
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。