Browse Source

Added flink without archives

Fred Damstra (k8s1) 2 years ago
parent
commit
cb1beb02c1

+ 8 - 0
Workloads/flink/1.account.yaml

@@ -0,0 +1,8 @@
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  creationTimestamp: "2023-06-07T13:17:29Z"
+  name: flink-service-account
+  namespace: default
+  resourceVersion: "163060669"
+  uid: 75e9c2a7-9e51-4799-b5bf-2076412ef700

+ 60 - 0
Workloads/flink/2.flink-configuration-configmap.yaml

@@ -0,0 +1,60 @@
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: flink-config
+  labels:
+    app: flink
+data:
+  flink-conf.yaml: |+
+    jobmanager.rpc.address: flink-jobmanager
+    taskmanager.numberOfTaskSlots: 2
+    blob.server.port: 6124
+    jobmanager.rpc.port: 6123
+    taskmanager.rpc.port: 6122
+    queryable-state.proxy.ports: 6125
+    jobmanager.memory.process.size: 1600m
+    taskmanager.memory.process.size: 1728m
+    parallelism.default: 2
+    kubernetes.jobmanager.service-account: flink-service-account
+    kubernetes.container.image: arm64v8/flink:1.14.6-scala_2.12
+    kubernetes.rest-service.exposed.type: LoadBalancer
+  log4j-console.properties: |+
+    # This affects logging for both user code and Flink
+    rootLogger.level = INFO
+    rootLogger.appenderRef.console.ref = ConsoleAppender
+    rootLogger.appenderRef.rolling.ref = RollingFileAppender
+    # Uncomment this if you want to _only_ change Flink's logging
+    #logger.flink.name = org.apache.flink
+    #logger.flink.level = INFO
+    # The following lines keep the log level of common libraries/connectors on
+    # log level INFO. The root logger does not override this. You have to manually
+    # change the log levels here.
+    logger.akka.name = akka
+    logger.akka.level = INFO
+    logger.kafka.name= org.apache.kafka
+    logger.kafka.level = INFO
+    logger.hadoop.name = org.apache.hadoop
+    logger.hadoop.level = INFO
+    logger.zookeeper.name = org.apache.zookeeper
+    logger.zookeeper.level = INFO
+    # Log all infos to the console
+    appender.console.name = ConsoleAppender
+    appender.console.type = CONSOLE
+    appender.console.layout.type = PatternLayout
+    appender.console.layout.pattern = %d{yyyy-MM-dd HH:mm:ss,SSS} %-5p %-60c %x - %m%n
+    # Log all infos in the given rolling file
+    appender.rolling.name = RollingFileAppender
+    appender.rolling.type = RollingFile
+    appender.rolling.append = false
+    appender.rolling.fileName = ${sys:log.file}
+    appender.rolling.filePattern = ${sys:log.file}.%i
+    appender.rolling.layout.type = PatternLayout
+    appender.rolling.layout.pattern = %d{yyyy-MM-dd HH:mm:ss,SSS} %-5p %-60c %x - %m%n
+    appender.rolling.policies.type = Policies
+    appender.rolling.policies.size.type = SizeBasedTriggeringPolicy
+    appender.rolling.policies.size.size=100MB
+    appender.rolling.strategy.type = DefaultRolloverStrategy
+    appender.rolling.strategy.max = 10
+    # Suppress the irrelevant (wrong) warnings from the Netty channel handler
+    logger.netty.name = org.apache.flink.shaded.akka.org.jboss.netty.channel.DefaultChannelPipeline
+    logger.netty.level = OFF

+ 17 - 0
Workloads/flink/3.jobmanager.yaml

@@ -0,0 +1,17 @@
+apiVersion: v1
+kind: Service
+metadata:
+  name: flink-jobmanager
+spec:
+  #type: ClusterIP
+  type: LoadBalancer
+  ports:
+  - name: rpc
+    port: 6123
+  - name: blob-server
+    port: 6124
+  - name: webui
+    port: 8081
+  selector:
+    app: flink
+    component: jobmanager

+ 48 - 0
Workloads/flink/4.jobmanager-session-deployment.yaml

@@ -0,0 +1,48 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: flink-jobmanager
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: flink
+      component: jobmanager
+  template:
+    metadata:
+      labels:
+        app: flink
+        component: jobmanager
+    spec:
+      containers:
+      - name: jobmanager
+        image: arm64v8/flink:1.14.6-scala_2.12
+        #image: apache/flink:1.10.3
+        # image: apache/flink:1.12.0-scala_2.11
+        args: ["jobmanager"]
+        ports:
+        - containerPort: 6123
+          name: rpc
+        - containerPort: 6124
+          name: blob-server
+        - containerPort: 8081
+          name: webui
+        livenessProbe:
+          tcpSocket:
+            port: 6123
+          initialDelaySeconds: 30
+          periodSeconds: 60
+        volumeMounts:
+        - name: flink-config-volume
+          mountPath: /opt/flink/conf
+        securityContext:
+          runAsUser: 9999  # refers to user _flink_ from official flink image, change if necessary
+      volumes:
+      - name: flink-config-volume
+        configMap:
+          name: flink-config
+          items:
+          - key: flink-conf.yaml
+            path: flink-conf.yaml
+          - key: log4j-console.properties
+            path: log4j-console.properties

+ 59 - 0
Workloads/flink/5.taskmanager-session-deployment.yaml

@@ -0,0 +1,59 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: flink-taskmanager
+spec:
+  replicas: 2
+  selector:
+    matchLabels:
+      app: flink
+      component: taskmanager
+  template:
+    metadata:
+      labels:
+        app: flink
+        component: taskmanager
+    spec:
+      containers:
+      - name: taskmanager
+        image: arm64v8/flink:1.14.6-scala_2.12
+        # image: apache/flink:1.10.3
+        # image: apache/flink:1.12.0-scala_2.11
+        args: ["taskmanager"]
+        ports:
+        - containerPort: 6122
+          name: rpc
+        - containerPort: 6125
+          name: query-state
+        livenessProbe:
+          tcpSocket:
+            port: 6122
+          initialDelaySeconds: 30
+          periodSeconds: 60
+        volumeMounts:
+        - name: flink-config-volume
+          mountPath: /opt/flink/conf/
+        securityContext:
+          runAsUser: 9999  # refers to user _flink_ from official flink image, change if necessary
+#      - name: beam-worker-pool
+#        #image: apache/beam_python3.7_sdk
+#        image: arm64v8/flink:1.14.6-scala_2.12
+#        args: ["--worker_pool"]
+#        ports:
+#        - containerPort: 50000
+#          name: pool
+#        livenessProbe:
+#          tcpSocket:
+#            port: 50000
+#          initialDelaySeconds: 30
+#          periodSeconds: 60
+      volumes:
+      - name: flink-config-volume
+        configMap:
+          name: flink-config
+          items:
+          - key: flink-conf.yaml
+            path: flink-conf.yaml
+          - key: log4j-console.properties
+            path: log4j-console.properties
+

+ 116 - 0
Workloads/flink/README.nativek8s.md

@@ -0,0 +1,116 @@
+# Flink is a distributed runner, hoping to use it with beam
+
+What a mess the documentation is.
+
+## RBAC
+
+Trying to avoid using the default service account, so running:
+
+```
+kubectl create serviceaccount flink-service-account
+kubectl create clusterrolebinding flink-role-binding-flink --clusterrole=edit --serviceaccount=default:flink-service-account
+
+# Save for later
+kubectl get serviceaccount flink-service-account -o yaml > 1.account.yaml
+kubectl get clusterrolebinding flink-role-binding-flink -o yaml > 2.role-binding.yaml
+```
+
+Unknown if those will yaml files will work to recreate, but worth a try.
+
+Set the service account in the configuration
+
+```
+cd flink-1.17.1
+echo '#====================================' >> conf/flink-conf.yaml
+echo '# Freds Custom Configuration' >> conf/flink-conf.yaml
+echo '#====================================' >> conf/flink-conf.yaml
+echo '' >> conf/flink-conf.yaml
+echo 'kubernetes.jobmanager.service-account: flink-service-account' >> conf/flink-conf.yaml
+```
+
+## Java
+
+```
+sudo apt install default-jre-headless
+```
+
+## Deployment
+
+https://nightlies.apache.org/flink/flink-docs-release-1.17/docs/deployment/resource-providers/native_kubernetes/
+
+
+```
+# I needed to pull a different image (error: "exec format error").
+echo 'kubernetes.container.image.ref: arm64v8/flink:1.17.1-scala_2.12' >> conf/flink-conf.yaml
+echo 'kubernetes.rest-service.exposed.type: LoadBalancer' >> conf/flink-conf.yaml
+```
+
+
+```
+# Create the cluster
+./bin/kubernetes-session.sh -Dkubernetes.cluster-id=my-first-flink-cluster
+# On 1.14, the image override didn't work, so instead:
+./bin/kubernetes-session.sh \
+    -Dkubernetes.cluster-id=my-first-flink-cluster \
+    -Dkubernetes.container.image.ref=arm64v8/flink:1.13.6-scala_2.12
+
+# If you want to Delete what you just created
+kubectl delete deployment/my-first-flink-cluster
+
+# Run a test job:
+./bin/flink run \
+    --target kubernetes-session \
+    -Dkubernetes.cluster-id=my-first-flink-cluster \
+    ./examples/streaming/TopSpeedWindowing.jar
+
+# To reconnect (wtf?)
+./bin/kubernetes-session.sh \
+    -Dkubernetes.cluster-id=my-first-flink-cluster \
+    -Dexecution.attached=true
+# Type "stop" to stop it.
+```
+
+# Let's beam to it
+
+```
+cd .. # Back out of the flink dir
+
+sudo apt install python3.10-venv
+python -m venv env
+source env/bin/activate
+pip install -r requirements.txt
+```
+
+Issue: Might need an older flink version
+Latest compatible according to: https://beam.apache.org/documentation/runners/flink/ appears to be 1.14.x
+
+# Yup. Need older version
+
+Note: Image ref is different in older version.
+
+from `conf/flink-conf.yaml`:
+
+```
+#====================================
+# Freds Custom Configuration
+#====================================
+
+kubernetes.jobmanager.service-account: flink-service-account
+kubernetes.container.image: arm64v8/flink:1.14.6-scala_2.12
+kubernetes.rest-service.exposed.type: LoadBalancer
+```
+
+## Beam test, take 2
+
+```
+# Start the flink server:
+cd ~/src/monkeybox_kubernetes/Workloads/flink
+source env/bin/activate
+flink-1.14.6/bin/kubernetes-session.sh -Dkubernetes.cluster-id=my-first-flink-cluster
+# Ignore the Job Manager Web Interface - It will be moved to a VIP
+python3 beam_test.py
+```
+
+Still didn't work..  Lots of issues. It's like beam really doesn't want to play this way.
+Renaming this readme and trying a different way.
+

+ 15 - 0
Workloads/flink/README.standalone.md

@@ -0,0 +1,15 @@
+# Standalone Version
+
+Going to try following the guide here:
+https://python.plainenglish.io/apache-beam-flink-cluster-kubernetes-python-a1965f37b7cb
+
+## Log
+
+```
+# Added k8s config to this one
+kubectl apply -f 2.flink-configuration-configmap.yaml
+#
+kubectl apply -f 3.jobmanager.yaml
+# Changed the image here
+
+

+ 30 - 0
Workloads/flink/beam_test.py

@@ -0,0 +1,30 @@
+#! /usr/bin/env python3
+import apache_beam as beam
+from apache_beam.options.pipeline_options import PipelineOptions
+
+options = PipelineOptions([
+    "--runner=FlinkRunner",
+    "--flink_version=1.14",
+    "--job_endpoint=10.42.42.215:8081",
+#    "--environment_type=LOOPBACK" # loopback for local only?
+    "--environment_type=DOCKER"
+])
+
+# From another example (https://python.plainenglish.io/apache-beam-flink-cluster-kubernetes-python-a1965f37b7cb)
+#    options = PipelineOptions([
+#        "--runner=FlinkRunner",
+#        "--flink_version=1.10",
+#        "--flink_master=localhost:8081",
+#        "--environment_type=EXTERNAL",
+#        "--environment_config=localhost:50000"
+#    ])
+
+
+
+
+with beam.Pipeline(options=options) as pipeline:
+  elements = (
+          pipeline
+          | "Create elements" >> beam.Create(["Hello", "World", "Fred", "was", "here", "twice", "Fred"])
+          | "Print Elements" >> beam.Map(print)
+          )

+ 1 - 0
Workloads/flink/requirements.txt

@@ -0,0 +1 @@
+apache-beam

+ 3 - 3
microk8s_notes.md

@@ -1,7 +1,7 @@
 # Installation
 
 ```
-sudo snap install microk8s --classic --channel=1.25/stable
+sudo snap install microk8s --classic --channel=1.25/stable # 1.27/stable on io
 sudo usermod -a -G microk8s fdamstra
 sudo chown -f -R fdamstra ~/.kube
 
@@ -10,7 +10,7 @@ sudo chown -f -R fdamstra ~/.kube
 microk8s status --wait-ready
 
 # Definitely want coredns:
-microk8s enable dns storage
+microk8s enable dns hostpath-storage
 
 # may want:
 alias kubectl='microk8s kubectl'
@@ -65,7 +65,7 @@ Using the status seems better
 
 ```
 microk8s enable metallb
-# Enter IP range: 10.42.42.211-10.42.42.240
+# Enter IP range: 10.42.42.211-10.42.42.240 # io is 10.42.42.40-10.42.42.49vim 
 ```
 
 ## NFS Provisioning