feat(ansible): update 1n-tx2 config 57/36957/3
authorJuraj Linkeš <juraj.linkes@pantheon.tech>
Fri, 19 Aug 2022 09:37:47 +0000 (11:37 +0200)
committerPeter Mikus <peter.mikus@protonmail.ch>
Thu, 25 Aug 2022 07:40:11 +0000 (07:40 +0000)
Arm VPP Device requires newer drivers, otherwise we're frequently seeing
a deadlock when running csit-initialize-vfs.

We've observed server slowdown in VPP Device jobs in the past and
testing revealed that disabling the cppc_cpufreq seemingly resolves the
issue (there was no observed slowdown with a sample of 500 job runs).

Also update Nomad config (server IPs).

Change-Id: I484417f2cdb701239ca7517406147d0f2f7adde4
Signed-off-by: Juraj Linkeš <juraj.linkes@pantheon.tech>
fdio.infra.ansible/inventories/lf_inventory/host_vars/10.30.51.70.yaml
fdio.infra.ansible/inventories/lf_inventory/host_vars/10.30.51.71.yaml
fdio.infra.ansible/roles/vpp_device/tasks/main.yaml
fdio.infra.ansible/roles/vpp_device/tasks/thunderx2.yaml [new file with mode: 0644]

index 66df09b..05e27a1 100644 (file)
@@ -10,6 +10,8 @@ grub:
   iommu.passthrough: "1"
 cpu_microarchitecture: "thunderx2"
 
+intel_700_matrix: "dpdk22.07"
+
 # User management.
 users:
   - username: localadmin
@@ -45,13 +47,13 @@ nomad_options:
   driver.whitelist: "docker,raw_exec,exec"
 nomad_service_mgr: "systemd"
 nomad_retry_servers:
-  - "10.32.8.15"
-  - "10.32.8.16"
-  - "10.32.8.17"
+  - "10.30.51.23"
+  - "10.30.51.24"
+  - "10.30.51.25"
 nomad_servers:
-  - "10.32.8.15:4647"
-  - "10.32.8.16:4647"
-  - "10.32.8.17:4647"
+  - "10.30.51.23:4647"
+  - "10.30.51.24:4647"
+  - "10.30.51.25:4647"
 nomad_cpu_total_compute: "40000"
 
 # Consul settigs.
@@ -64,9 +66,9 @@ consul_encrypt: "Y4T+5JGx1C3l2NFBBvkTWQ=="
 consul_node_name: "{{ hostname }}"
 consul_node_role: "client"
 consul_retry_servers:
-  - "10.32.8.15"
-  - "10.32.8.16"
-  - "10.32.8.17"
+  - "10.30.51.23"
+  - "10.30.51.24"
+  - "10.30.51.25"
 consul_service_mgr: "systemd"
 
 # Docker settings.
index 9642b69..ab71f7f 100644 (file)
@@ -10,6 +10,8 @@ grub:
   iommu.passthrough: "1"
 cpu_microarchitecture: "thunderx2"
 
+intel_700_matrix: "dpdk22.07"
+
 # User management.
 users:
   - username: localadmin
@@ -45,13 +47,13 @@ nomad_options:
   driver.whitelist: "docker,raw_exec,exec"
 nomad_service_mgr: "systemd"
 nomad_retry_servers:
-  - "10.32.8.15"
-  - "10.32.8.16"
-  - "10.32.8.17"
+  - "10.30.51.23"
+  - "10.30.51.24"
+  - "10.30.51.25"
 nomad_servers:
-  - "10.32.8.15:4647"
-  - "10.32.8.16:4647"
-  - "10.32.8.17:4647"
+  - "10.30.51.23:4647"
+  - "10.30.51.24:4647"
+  - "10.30.51.25:4647"
 nomad_cpu_total_compute: "40000"
 
 # Consul settigs.
@@ -64,9 +66,9 @@ consul_encrypt: "Y4T+5JGx1C3l2NFBBvkTWQ=="
 consul_node_name: "{{ hostname }}"
 consul_node_role: "client"
 consul_retry_servers:
-  - "10.32.8.15"
-  - "10.32.8.16"
-  - "10.32.8.17"
+  - "10.30.51.23"
+  - "10.30.51.24"
+  - "10.30.51.25"
 consul_service_mgr: "systemd"
 
 # Docker settings.
index 418217a..ec20d2a 100644 (file)
@@ -1,17 +1,12 @@
 ---
 # file: roles/vpp_device/tasks/main.yaml
 
-- name: Load Kernel Modules By Default
-  lineinfile:
-    path: "/etc/modules"
-    state: "present"
-    line: "{{ item }}"
-  with_items:
-    - "vfio-pci"
+- name: ThunderX2 Kernel Modules Config
+  import_tasks: thunderx2.yaml
   when:
     - cpu_microarchitecture == "thunderx2"
   tags:
-    - load-kernel-modules
+    - conf-kernel-modules
 
 - name: Disable IPv6 Router Advertisement
   sysctl:
diff --git a/fdio.infra.ansible/roles/vpp_device/tasks/thunderx2.yaml b/fdio.infra.ansible/roles/vpp_device/tasks/thunderx2.yaml
new file mode 100644 (file)
index 0000000..438095f
--- /dev/null
@@ -0,0 +1,23 @@
+---
+# file: roles/vpp_device/tasks/thunderx2.yaml
+
+- name: Load Kernel Modules On Startup (vfio-pci)
+  ansible.builtin.lineinfile:
+    path: "/etc/modules"
+    state: "present"
+    line: "{{ item }}"
+  with_items:
+    - "vfio-pci"
+  tags:
+    - load-kernel-modules
+
+- name: Disable Kernel Modules On Startup (cppc_cpufreq)
+  ansible.builtin.lineinfile:
+    path: "/etc/modprobe.d/blacklist-cppc_cpufreq.conf"
+    state: "present"
+    line: "{{ item }}"
+    create: "yes"
+  with_items:
+    - "blacklist cppc_cpufreq"
+  tags:
+    - disable-kernel-modules