performance tweak - faster checksum
authorimarom <[email protected]>
Wed, 28 Sep 2016 11:57:43 +0000 (14:57 +0300)
committerimarom <[email protected]>
Wed, 28 Sep 2016 12:51:18 +0000 (15:51 +0300)
linux/ws_main.py
src/common/Network/Packet/IPHeader.h
src/stateless/cp/trex_stream_vm.h

index b843fcb..a140e17 100755 (executable)
@@ -347,7 +347,7 @@ class build_option:
             result+=['-m32'];
 
         if self.isRelease () :
-            result+=['-O2'];
+            result+=['-O3'];
         else:
             result+=['-O0','-DDEBUG','-D_DEBUG','-DSTILE_CPP_ASSERT','-DSTILE_SHIM_ASSERT'];
 
index b9ef8a2..dd9f509 100755 (executable)
@@ -19,6 +19,10 @@ limitations under the License.
 
 #include "PacketHeaderBase.h"
 
+#ifndef likely
+#define likely(x)  __builtin_expect((x),1)
+#endif /* likely */
+
 #define IPV4_HDR_LEN 20
 
 class IPHeader
@@ -140,6 +144,59 @@ public:
 
        inline  void    swapSrcDest                     ();
 
+
+    inline void     updateCheckSumFast()  {
+        myChecksum = 0;
+
+        if (likely(myVer_HeaderLength == 0x45)) {
+            myChecksum = calc_cksum_fixed();
+        } else {
+            myChecksum = calc_cksum_nonfixed();
+        }
+
+    }
+
+protected:
+
+    /* fast inline checksum calculation */
+    inline uint16_t calc_cksum_fixed() {
+        const uint16_t *ipv4 = (const uint16_t *)this;
+
+        int sum = 0;
+
+        /* calcualte 20 bytes unrolled loop */
+        sum += ipv4[0];
+        sum += ipv4[1];
+        sum += ipv4[2];
+        sum += ipv4[3];
+        sum += ipv4[4];
+        sum += ipv4[5];
+        sum += ipv4[6];
+        sum += ipv4[7];
+        sum += ipv4[8];
+        sum += ipv4[9];
+
+        sum = (sum & 0xffff) + (sum >> 16);
+
+        return (uint16_t)(~sum);
+    }
+
+    /* a slow non-frequent call - never inline */
+    uint16_t calc_cksum_nonfixed() __attribute__ ((noinline)) {
+        const uint16_t *ipv4 = (const uint16_t *)this;
+
+        int sum = 0;
+        int hlen = getHeaderLength();
+
+        for (int i = 0; i < (hlen / 2); i++) {
+            sum += ipv4[i];
+        }
+
+        sum = (sum & 0xffff) + (sum >> 16);
+
+        return (uint16_t)(~sum);
+    }
+
 ////////////////////////////////////////////////////////////////////////////////////////
 // Common Header Interface
 ////////////////////////////////////////////////////////////////////////////////////////
index be0c03b..a0de7b2 100644 (file)
@@ -499,16 +499,15 @@ public:
 } __attribute__((packed));
 
 
-
-
 struct StreamDPOpIpv4Fix {
     uint8_t m_op;
     uint16_t  m_offset;
 public:
     void dump(FILE *fd,std::string opt);
-    void run(uint8_t * pkt_base){
-        IPHeader *      ipv4=  (IPHeader *)(pkt_base+m_offset);
-        ipv4->updateCheckSum();
+    void run(uint8_t * pkt_base) {
+
+        IPHeader *ipv4 = (IPHeader *)(pkt_base+m_offset);
+        ipv4->updateCheckSumFast();
     }
 
 } __attribute__((packed));