- shani_sha256_4_rounds (w[0], 0, s);
- shani_sha256_4_rounds (w[1], 1, s);
- shani_sha256_4_rounds (w[2], 2, s);
- shani_sha256_4_rounds (w[3], 3, s);
-
- shani_sha256_cycle_w (w, 0, 1, 2, 3);
- shani_sha256_4_rounds (w[0], 4, s);
- shani_sha256_cycle_w (w, 1, 2, 3, 0);
- shani_sha256_4_rounds (w[1], 5, s);
- shani_sha256_cycle_w (w, 2, 3, 0, 1);
- shani_sha256_4_rounds (w[2], 6, s);
- shani_sha256_cycle_w (w, 3, 0, 1, 2);
- shani_sha256_4_rounds (w[3], 7, s);
-
- shani_sha256_cycle_w (w, 0, 1, 2, 3);
- shani_sha256_4_rounds (w[0], 8, s);
- shani_sha256_cycle_w (w, 1, 2, 3, 0);
- shani_sha256_4_rounds (w[1], 9, s);
- shani_sha256_cycle_w (w, 2, 3, 0, 1);
- shani_sha256_4_rounds (w[2], 10, s);
- shani_sha256_cycle_w (w, 3, 0, 1, 2);
- shani_sha256_4_rounds (w[3], 11, s);
-
- shani_sha256_cycle_w (w, 0, 1, 2, 3);
- shani_sha256_4_rounds (w[0], 12, s);
- shani_sha256_cycle_w (w, 1, 2, 3, 0);
- shani_sha256_4_rounds (w[1], 13, s);
- shani_sha256_cycle_w (w, 2, 3, 0, 1);
- shani_sha256_4_rounds (w[2], 14, s);
- shani_sha256_cycle_w (w, 3, 0, 1, 2);
- shani_sha256_4_rounds (w[3], 15, s);
+ w[0] = clib_sha256_vec_load (m[0]);
+ w[1] = clib_sha256_vec_load (m[1]);
+ w[2] = clib_sha256_vec_load (m[2]);
+ w[3] = clib_sha256_vec_load (m[3]);
+
+ clib_sha256_vec_4_rounds (w[0], 0, s);
+ clib_sha256_vec_4_rounds (w[1], 1, s);
+ clib_sha256_vec_4_rounds (w[2], 2, s);
+ clib_sha256_vec_4_rounds (w[3], 3, s);
+
+ clib_sha256_vec_cycle_w (w, 0);
+ clib_sha256_vec_4_rounds (w[0], 4, s);
+ clib_sha256_vec_cycle_w (w, 1);
+ clib_sha256_vec_4_rounds (w[1], 5, s);
+ clib_sha256_vec_cycle_w (w, 2);
+ clib_sha256_vec_4_rounds (w[2], 6, s);
+ clib_sha256_vec_cycle_w (w, 3);
+ clib_sha256_vec_4_rounds (w[3], 7, s);
+
+ clib_sha256_vec_cycle_w (w, 0);
+ clib_sha256_vec_4_rounds (w[0], 8, s);
+ clib_sha256_vec_cycle_w (w, 1);
+ clib_sha256_vec_4_rounds (w[1], 9, s);
+ clib_sha256_vec_cycle_w (w, 2);
+ clib_sha256_vec_4_rounds (w[2], 10, s);
+ clib_sha256_vec_cycle_w (w, 3);
+ clib_sha256_vec_4_rounds (w[3], 11, s);
+
+ clib_sha256_vec_cycle_w (w, 0);
+ clib_sha256_vec_4_rounds (w[0], 12, s);
+ clib_sha256_vec_cycle_w (w, 1);
+ clib_sha256_vec_4_rounds (w[1], 13, s);
+ clib_sha256_vec_cycle_w (w, 2);
+ clib_sha256_vec_4_rounds (w[2], 14, s);
+ clib_sha256_vec_cycle_w (w, 3);
+ clib_sha256_vec_4_rounds (w[3], 15, s);