}
 
       if (hi)
-       hi->max_l3_packet_bytes[VLIB_RX] = hi->max_l3_packet_bytes[VLIB_TX] =
-         xd->port_conf.rxmode.max_rx_pkt_len - sizeof (ethernet_header_t);
+       hi->max_packet_bytes = xd->port_conf.rxmode.max_rx_pkt_len
+         - sizeof (ethernet_header_t);
       else
        clib_warning ("hi NULL");
 
 
                    /* Init l3 packet size allowed on bonded interface */
                    bhi->max_packet_bytes = ETHERNET_MAX_PACKET_BYTES;
-                   bhi->max_l3_packet_bytes[VLIB_RX] =
-                     bhi->max_l3_packet_bytes[VLIB_TX] =
-                     ETHERNET_MAX_PACKET_BYTES - sizeof (ethernet_header_t);
                    while (nlink >= 1)
                      {         /* for all slave links */
                        int slave = slink[--nlink];
                        clib_memcpy (shi->hw_address, addr, 6);
                        clib_memcpy (sei->address, addr, 6);
                        /* Set l3 packet size allowed as the lowest of slave */
-                       if (bhi->max_l3_packet_bytes[VLIB_RX] >
-                           shi->max_l3_packet_bytes[VLIB_RX])
-                         bhi->max_l3_packet_bytes[VLIB_RX] =
-                           bhi->max_l3_packet_bytes[VLIB_TX] =
-                           shi->max_l3_packet_bytes[VLIB_RX];
+                       if (bhi->max_packet_bytes > shi->max_packet_bytes)
+                         bhi->max_packet_bytes = bhi->max_packet_bytes;
+
                        /* Set max packet size allowed as the lowest of slave */
                        if (bhi->max_packet_bytes > shi->max_packet_bytes)
                          bhi->max_packet_bytes = shi->max_packet_bytes;
 
 _(punt_reply)                                           \
 _(feature_enable_disable_reply)                                \
 _(sw_interface_tag_add_del_reply)                      \
-_(sw_interface_set_mtu_reply)                           \
+_(hw_interface_set_mtu_reply)                           \
 _(p2p_ethernet_add_reply)                               \
 _(p2p_ethernet_del_reply)                               \
 _(lldp_config_reply)                                    \
 _(FEATURE_ENABLE_DISABLE_REPLY, feature_enable_disable_reply)           \
 _(SW_INTERFACE_TAG_ADD_DEL_REPLY, sw_interface_tag_add_del_reply)      \
 _(L2_XCONNECT_DETAILS, l2_xconnect_details)                             \
-_(SW_INTERFACE_SET_MTU_REPLY, sw_interface_set_mtu_reply)               \
+_(HW_INTERFACE_SET_MTU_REPLY, hw_interface_set_mtu_reply)               \
 _(IP_NEIGHBOR_DETAILS, ip_neighbor_details)                             \
 _(SW_INTERFACE_GET_TABLE_REPLY, sw_interface_get_table_reply)           \
 _(P2P_ETHERNET_ADD_REPLY, p2p_ethernet_add_reply)                       \
 }
 
 static int
-api_sw_interface_set_mtu (vat_main_t * vam)
+api_hw_interface_set_mtu (vat_main_t * vam)
 {
   unformat_input_t *i = vam->input;
-  vl_api_sw_interface_set_mtu_t *mp;
+  vl_api_hw_interface_set_mtu_t *mp;
   u32 sw_if_index = ~0;
   u32 mtu = 0;
   int ret;
     }
 
   /* Construct the API message */
-  M (SW_INTERFACE_SET_MTU, mp);
+  M (HW_INTERFACE_SET_MTU, mp);
   mp->sw_if_index = ntohl (sw_if_index);
   mp->mtu = ntohs ((u16) mtu);
 
 _(sw_interface_tag_add_del, "<intfc> | sw_if_index <nn> tag <text>"    \
 "[disable]")                                                           \
 _(l2_xconnect_dump, "")                                                \
-_(sw_interface_set_mtu, "<intfc> | sw_if_index <nn> mtu <nn>")        \
+_(hw_interface_set_mtu, "<intfc> | hw_if_index <nn> mtu <nn>")        \
 _(ip_neighbor_dump, "[ip6] <intfc> | sw_if_index <nn>")                 \
 _(sw_interface_get_table, "<intfc> | sw_if_index <id> [ipv6]")          \
 _(p2p_ethernet_add, "<intfc> | sw_if_index <nn> remote_mac <mac-address> sub_id <id>") \
 
--- /dev/null
+# Introduction
+Maximum Transmission Unit is a term used to describe the maximum sized "thingy" that can be sent out an interface. It can refer to the maximum frame size that a NIC can send. On Ethernet that would include the Ethernet header but typically not the IGF. It can refer to the maximum packet size, that is, on Ethernet an MTU of 1500, would allow an IPv4 packet of 1500 bytes, that would result in an Ethernet frame of 1518 bytes.
+
+# MTU in VPP
+VPP allows setting of the physical payload MTU. I.e. not including L2 overhead. Setting the hardware MTU will program the NIC.
+This MTU will be inherited by all software interfaces.
+
+VPP also allows setting of the payload MTU for software interfaces. Independently of the MTU set on the hardware. If the software payload MTU is set higher than the capability of the NIC, the packet will be dropped.
+
+In addition VPP supports setting the MTU of individual network layer protocols. IPv4, IPv6 or MPLS. For example an IPv4 MTU of 1500 (includes the IPv4 header) will fit in a hardware payload MTU of 1500.
+
+_Note we might consider changing the hardware payload MTU to hardware MTU_. That is, the MTU includes all L2 framing. Then the payload MTU can be calculated based on the interface's configuration. E.g. 802.1q tags etc.
+
+There are currently no checks or warnings if e.g. the user configures a per-protocol MTU larger than the underlying payload MTU. If that happens packets will be fragmented or dropped.
+
+## Data structures
+The hardware payload MTU is stored in the max_packet_bytes variable in the vnet_hw_interface_t structure.
+
+The software MTU (previously max_l3_packet_bytes) is in vnet_sw_interface_t->in mtu[VNET_N_MTU].
+
+# API
+
+## Set physical MTU
+
+This API message is used to set the physical MTU. It is currently limited to Ethernet interfaces. Note, this programs the NIC.
+
+```
+autoreply define hw_interface_set_mtu
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+ u16 mtu;
+};
+```
+
+## Set the L2 payload MTU (not including the L2 header) and per-protocol MTUs
+
+This API message sets the L3 payload MTU. E.g. on Ethernet it is the maximum size of the Ethernet payload. If a value is left as 0, then the default is picked from VNET_MTU_L3.
+
+```
+autoreply define sw_interface_set_mtu
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+ /* $$$$ Replace with enum */
+ u32 mtu[4]; /* 0 - L3, 1 - IP4, 2 - IP6, 3 - MPLS */
+};
+
+```
+
+## Get interface MTU
+
+The various MTUs on an interface can be queried with the sw_interface_dump/sw_interface_details calls.
+
+```
+define sw_interface_details
+{
+  /* MTU */
+  u16 link_mtu;
+
+  /* Per protocol MTUs */
+  u32 mtu[4]; /* 0 - L3, 1 - IP4, 2 - IP6, 3 - MPLS */
+};
+```
+
+# CLI
+
+```
+set interface mtu [packet|ip4|ip6|mpls] <value> <interface>
+```
 
 
     adj = adj_get(ai);
 
-    vnet_rewrite_update_mtu (vnet_get_main(),
+    vnet_rewrite_update_mtu (vnet_get_main(), adj->ia_link,
                              &adj->rewrite_header);
 
     return (ADJ_WALK_RC_CONTINUE);
 }
 
-static walk_rc_t
-adj_sw_mtu_update (vnet_main_t * vnm,
-                   u32 sw_if_index,
-                   void *ctx)
+static void
+adj_mtu_update (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
 {
-    /*
-     * Walk all the adjacencies on the interface to update the cached MTU
-     */
-    adj_walk (sw_if_index, adj_mtu_update_walk_cb, NULL);
-
-    return (WALK_CONTINUE);
+  adj_walk (sw_if_index, adj_mtu_update_walk_cb, NULL);
 }
 
-void
-adj_mtu_update (u32 hw_if_index)
-{
-    /*
-     * Walk all the SW interfaces on the HW interface to update the cached MTU
-     */
-    vnet_hw_interface_walk_sw(vnet_get_main(),
-                              hw_if_index,
-                              adj_sw_mtu_update,
-                              NULL);
-}
+VNET_SW_INTERFACE_MTU_CHANGE_FUNCTION(adj_mtu_update);
 
 /**
  * @brief Walk the Adjacencies on a given interface
 
  */
 extern void adj_feature_update (u32 sw_if_index, u8 arc_index, u8 is_enable);
 
-/**
- * @brief Notify the adjacency subsystem that the MTU settings for
- * an HW interface have changed
- */
-extern void adj_mtu_update (u32 hw_if_index);
-
 /**
  * @brief
  * The global adjacnecy pool. Exposed for fast/inline data-plane access
 
        adj->rewrite_header.sw_if_index = sw_if_index;
        adj->rewrite_header.data_bytes = 0;
         adj->rewrite_header.max_l3_packet_bytes =
-            vnet_sw_interface_get_mtu(vnet_get_main(), sw_if_index, VLIB_TX);
-
+         vnet_sw_interface_get_mtu(vnet_get_main(), sw_if_index,
+                                    vnet_link_to_mtu(linkt));
         adj_lock(adj_get_index(adj));
 
        vnet_update_adjacency_for_sw_interface(vnet_get_main(),
 
        adj_mcasts[proto][sw_if_index] = adj_get_index(adj);
         adj_lock(adj_get_index(adj));
 
-       vnet_rewrite_init(vnm, sw_if_index,
+       vnet_rewrite_init(vnm, sw_if_index, link_type,
                          adj_get_mcast_node(proto),
                          vnet_tx_node_index_for_sw_interface(vnm, sw_if_index),
                          &adj->rewrite_header);
 
        adj_index = adj_get_index(adj);
        adj_lock(adj_index);
 
-       vnet_rewrite_init(vnm, sw_if_index,
+       vnet_rewrite_init(vnm, sw_if_index, link_type,
                          adj_get_nd_node(nh_proto),
                          vnet_tx_node_index_for_sw_interface(vnm, sw_if_index),
                          &adj->rewrite_header);
 
 void
 vnet_rewrite_init (vnet_main_t * vnm,
                   u32 sw_if_index,
+                  vnet_link_t linkt,
                   u32 this_node, u32 next_node, vnet_rewrite_header_t * rw)
 {
   rw->sw_if_index = sw_if_index;
   rw->next_index = vlib_node_add_next (vnm->vlib_main, this_node, next_node);
   rw->max_l3_packet_bytes =
-    vnet_sw_interface_get_mtu (vnm, sw_if_index, VLIB_TX);
+    vnet_sw_interface_get_mtu (vnm, sw_if_index, vnet_link_to_mtu (linkt));
 }
 
 void
-vnet_rewrite_update_mtu (vnet_main_t * vnm, vnet_rewrite_header_t * rw)
+vnet_rewrite_update_mtu (vnet_main_t * vnm, vnet_link_t linkt,
+                        vnet_rewrite_header_t * rw)
 {
   rw->max_l3_packet_bytes =
-    vnet_sw_interface_get_mtu (vnm, rw->sw_if_index, VLIB_TX);
+    vnet_sw_interface_get_mtu (vnm, rw->sw_if_index,
+                              vnet_link_to_mtu (linkt));
 }
 
 void
     vnet_get_hw_interface_class (vnm, hw->hw_class_index);
   u8 *rewrite = NULL;
 
-  vnet_rewrite_init (vnm, sw_if_index, node_index,
+  vnet_rewrite_init (vnm, sw_if_index, link_type, node_index,
                     vnet_tx_node_index_for_sw_interface (vnm, sw_if_index),
                     rw);
 
 
 
 void vnet_rewrite_init (struct vnet_main_t *vnm,
                        u32 sw_if_index,
+                       vnet_link_t linkt,
                        u32 this_node,
                        u32 next_node, vnet_rewrite_header_t * rw);
 
 void vnet_rewrite_update_mtu (struct vnet_main_t *vnm,
-                             vnet_rewrite_header_t * rw);
+                             vnet_link_t linkt, vnet_rewrite_header_t * rw);
 
 u8 *vnet_build_rewrite_for_sw_interface (struct vnet_main_t *vnm,
                                         u32 sw_if_index,
 
   if (error)
     clib_error_report (error);
 
-  vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, vui->hw_if_index);
-  hi->max_l3_packet_bytes[VLIB_RX] = hi->max_l3_packet_bytes[VLIB_TX] = 9000;
+  vnet_sw_interface_set_mtu (vnm, vui->sw_if_index, 9000);
 }
 
 /*
 
     ETHERNET_MIN_PACKET_BYTES;
   hi->max_packet_bytes = hi->max_supported_packet_bytes =
     ETHERNET_MAX_PACKET_BYTES;
-  hi->per_packet_overhead_bytes =
-    /* preamble */ 8 + /* inter frame gap */ 12;
 
   /* Standard default ethernet MTU. */
-  hi->max_l3_packet_bytes[VLIB_RX] = hi->max_l3_packet_bytes[VLIB_TX] = 9000;
+  vnet_sw_interface_set_mtu (vnm, hi->sw_if_index, 9000);
 
   clib_memcpy (ei->address, address, sizeof (ei->address));
   vec_add (hi->hw_address, address, sizeof (ei->address));
 
        64 + sizeof (gre_header_t) + sizeof (ip6_header_t);
     }
 
-  hi->per_packet_overhead_bytes =
-    /* preamble */ 8 + /* inter frame gap */ 12;
-
   /* Standard default gre MTU. */
-  hi->max_l3_packet_bytes[VLIB_RX] = hi->max_l3_packet_bytes[VLIB_TX] = 9000;
+  vnet_sw_interface_set_mtu (vnm, sw_if_index, 9000);
 
   /*
    * source the FIB entry for the tunnel's destination
 
-option version = "1.1.0";
+option version = "2.0.0";
 
 service {
   rpc want_interface_events returns want_interface_events_reply
   u8 admin_up_down;
 };
 
-/** \brief Set interface MTU
+/** \brief Set interface physical MTU
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
     @param sw_if_index - index of the interface to set MTU on
     @param mtu - MTU
 */
-autoreply define sw_interface_set_mtu
+autoreply define hw_interface_set_mtu
 {
   u32 client_index;
   u32 context;
   u16 mtu;
 };
 
+/** \brief Set interface L3 MTU */
+autoreply define sw_interface_set_mtu
+{
+  u32 client_index;
+  u32 context;
+  u32 sw_if_index;
+  /* $$$$ Replace with enum */
+  u32 mtu[4]; /* 0 - L3, 1 - IP4, 2 - IP6, 3 - MPLS */
+};
+
+
 /** \brief Interface Event generated by want_interface_events
     @param client_index - opaque cookie to identify the sender
     @param pid - client pid registered to receive notification
   /* MTU */
   u16 link_mtu;
 
+  /* Per protocol MTUs */
+  u32 mtu[4]; /* 0 - L3, 1 - IP4, 2 - IP6, 3 - MPLS */
+
   /* Subinterface ID. A number 0-N to uniquely identify this subinterface under the super interface */
   u32 sub_id;
 
 
      /* helper_flags no redistribution */ 0);
 }
 
-void
-vnet_hw_interface_set_mtu (vnet_main_t * vnm, u32 hw_if_index, u32 mtu)
-{
-  vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_index);
-
-  if (hi->max_packet_bytes != mtu)
-    {
-      u16 l3_pad = hi->max_packet_bytes - hi->max_l3_packet_bytes[VLIB_TX];
-      hi->max_packet_bytes = mtu;
-      hi->max_l3_packet_bytes[VLIB_TX] =
-       hi->max_l3_packet_bytes[VLIB_RX] = mtu - l3_pad;
-      ethernet_set_flags (vnm, hw_if_index, ETHERNET_INTERFACE_FLAG_MTU);
-      adj_mtu_update (hw_if_index);
-    }
-}
-
 static void
 unserialize_vnet_hw_interface_set_flags (serialize_main_t * m, va_list * va)
 {
   pool_put (im->sw_interfaces, sw);
 }
 
+static clib_error_t *
+call_sw_interface_mtu_change_callbacks (vnet_main_t * vnm, u32 sw_if_index)
+{
+  return call_elf_section_interface_callbacks
+    (vnm, sw_if_index, 0, vnm->sw_interface_mtu_change_functions);
+}
+
+void
+vnet_sw_interface_set_mtu (vnet_main_t * vnm, u32 sw_if_index, u32 mtu)
+{
+  vnet_sw_interface_t *si = vnet_get_sw_interface (vnm, sw_if_index);
+
+  if (si->mtu[VNET_MTU_L3] != mtu)
+    {
+      si->mtu[VNET_MTU_L3] = mtu;
+      call_sw_interface_mtu_change_callbacks (vnm, sw_if_index);
+    }
+}
+
+void
+vnet_sw_interface_set_protocol_mtu (vnet_main_t * vnm, u32 sw_if_index,
+                                   u32 mtu[])
+{
+  vnet_sw_interface_t *si = vnet_get_sw_interface (vnm, sw_if_index);
+  bool changed = false;
+  int i;
+
+  for (i = 0; i < VNET_N_MTU; i++)
+    {
+      if (si->mtu[i] != mtu[i])
+       {
+         si->mtu[i] = mtu[i];
+         changed = true;
+       }
+    }
+  /* Notify interested parties */
+  if (changed)
+    call_sw_interface_mtu_change_callbacks (vnm, sw_if_index);
+}
+
+/*
+ * Reflect a change in hardware MTU on protocol MTUs
+ */
+static walk_rc_t
+sw_interface_walk_callback (vnet_main_t * vnm, u32 sw_if_index, void *ctx)
+{
+  u32 *link_mtu = ctx;
+  vnet_sw_interface_set_mtu (vnm, sw_if_index, *link_mtu);
+  return WALK_CONTINUE;
+}
+
+void
+vnet_hw_interface_set_mtu (vnet_main_t * vnm, u32 hw_if_index, u32 mtu)
+{
+  vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_index);
+
+  if (hi->max_packet_bytes != mtu)
+    {
+      hi->max_packet_bytes = mtu;
+      ethernet_set_flags (vnm, hw_if_index, ETHERNET_INTERFACE_FLAG_MTU);
+      vnet_hw_interface_walk_sw (vnm, hw_if_index, sw_interface_walk_callback,
+                                &mtu);
+    }
+}
+
 static void
 setup_tx_node (vlib_main_t * vm,
               u32 node_index, vnet_device_class_t * dev_class)
 
   hw->max_rate_bits_per_sec = 0;
   hw->min_packet_bytes = 0;
-  hw->per_packet_overhead_bytes = 0;
-  hw->max_l3_packet_bytes[VLIB_RX] = ~0;
-  hw->max_l3_packet_bytes[VLIB_TX] = ~0;
+  vnet_sw_interface_set_mtu (vnm, hw->sw_if_index, 0);
 
   if (dev_class->tx_function == 0)
     goto no_output_nodes;      /* No output/tx nodes to create */
   return (0);
 }
 
+vnet_mtu_t
+vnet_link_to_mtu (vnet_link_t link)
+{
+  switch (link)
+    {
+    case VNET_LINK_IP4:
+      return (VNET_MTU_IP4);
+    case VNET_LINK_IP6:
+      return (VNET_MTU_IP6);
+    case VNET_LINK_MPLS:
+      return (VNET_MTU_MPLS);
+    default:
+      return (VNET_MTU_L3);
+    }
+}
+
 u8 *
 default_build_rewrite (vnet_main_t * vnm,
                       u32 sw_if_index,
 
   _VNET_INTERFACE_FUNCTION_DECL(f,hw_interface_add_del)
 #define VNET_HW_INTERFACE_LINK_UP_DOWN_FUNCTION(f)             \
   _VNET_INTERFACE_FUNCTION_DECL(f,hw_interface_link_up_down)
-#define VNET_HW_INTERFACE_LINK_UP_DOWN_FUNCTION_PRIO(f,p)        \
+#define VNET_HW_INTERFACE_LINK_UP_DOWN_FUNCTION_PRIO(f,p)       \
   _VNET_INTERFACE_FUNCTION_DECL_PRIO(f,hw_interface_link_up_down,p)
+#define VNET_SW_INTERFACE_MTU_CHANGE_FUNCTION(f)                \
+  _VNET_INTERFACE_FUNCTION_DECL(f,sw_interface_mtu_change)
 #define VNET_SW_INTERFACE_ADD_DEL_FUNCTION(f)                  \
   _VNET_INTERFACE_FUNCTION_DECL(f,sw_interface_add_del)
 #define VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION(f)            \
   /* Largest packet size for this interface. */
   u32 max_packet_bytes;
 
-  /* Number of extra bytes that go on the wire.
-     Packet length on wire
-     = max (length + per_packet_overhead_bytes, min_packet_bytes). */
-  u32 per_packet_overhead_bytes;
-
-  /* Receive and transmit layer 3 packet size limits (MRU/MTU). */
-  u32 max_l3_packet_bytes[VLIB_N_RX_TX];
-
   /* Hash table mapping sub interface id to sw_if_index. */
   uword *sub_interface_sw_if_index_by_id;
 
   VNET_FLOOD_CLASS_NO_FLOOD,
 } vnet_flood_class_t;
 
+/* Per protocol MTU */
+typedef enum
+{
+  VNET_MTU_L3,                 /* Default payload MTU (without L2 headers) */
+  VNET_MTU_IP4,                        /* Per-protocol MTUs overriding default */
+  VNET_MTU_IP6,
+  VNET_MTU_MPLS,
+  VNET_N_MTU
+} vnet_mtu_t;
+
+extern vnet_mtu_t vnet_link_to_mtu (vnet_link_t link);
+
 /* Software-interface.  This corresponds to a Ethernet VLAN, ATM vc, a
    tunnel, etc.  Configuration (e.g. IP address) gets attached to
    software interface. */
   /* VNET_SW_INTERFACE_TYPE_HARDWARE. */
   u32 hw_if_index;
 
+  /* MTU for network layer (not including L2 headers) */
+  u32 mtu[VNET_N_MTU];
+
   /* VNET_SW_INTERFACE_TYPE_SUB. */
   vnet_sub_interface_t sub;
 
 
 
 #define foreach_vpe_api_msg                                     \
 _(SW_INTERFACE_SET_FLAGS, sw_interface_set_flags)               \
+_(HW_INTERFACE_SET_MTU, hw_interface_set_mtu)                   \
 _(SW_INTERFACE_SET_MTU, sw_interface_set_mtu)                   \
 _(WANT_INTERFACE_EVENTS, want_interface_events)                 \
 _(SW_INTERFACE_DUMP, sw_interface_dump)                         \
 }
 
 static void
-vl_api_sw_interface_set_mtu_t_handler (vl_api_sw_interface_set_mtu_t * mp)
+vl_api_hw_interface_set_mtu_t_handler (vl_api_hw_interface_set_mtu_t * mp)
 {
-  vl_api_sw_interface_set_mtu_reply_t *rmp;
+  vl_api_hw_interface_set_mtu_reply_t *rmp;
   vnet_main_t *vnm = vnet_get_main ();
   u32 sw_if_index = ntohl (mp->sw_if_index);
   u16 mtu = ntohs (mp->mtu);
 
   vnet_hw_interface_set_mtu (vnm, si->hw_if_index, mtu);
 
+  BAD_SW_IF_INDEX_LABEL;
+  REPLY_MACRO (VL_API_HW_INTERFACE_SET_MTU_REPLY);
+}
+
+static void
+vl_api_sw_interface_set_mtu_t_handler (vl_api_sw_interface_set_mtu_t * mp)
+{
+  vl_api_sw_interface_set_mtu_reply_t *rmp;
+  vnet_main_t *vnm = vnet_get_main ();
+  u32 sw_if_index = ntohl (mp->sw_if_index);
+  int rv = 0;
+  int i;
+  u32 per_protocol_mtu[VNET_N_MTU];
+
+  VALIDATE_SW_IF_INDEX (mp);
+
+  for (i = 0; i < VNET_N_MTU; i++)
+    per_protocol_mtu[i] = ntohl (mp->mtu[i]);
+
+  vnet_sw_interface_set_protocol_mtu (vnm, sw_if_index, per_protocol_mtu);
+
   BAD_SW_IF_INDEX_LABEL;
   REPLY_MACRO (VL_API_SW_INTERFACE_SET_MTU_REPLY);
 }
   mp->link_speed = ((hi->flags & VNET_HW_INTERFACE_FLAG_SPEED_MASK) >>
                    VNET_HW_INTERFACE_FLAG_SPEED_SHIFT);
   mp->link_mtu = ntohs (hi->max_packet_bytes);
+  mp->mtu[VNET_MTU_L3] = ntohl (swif->mtu[VNET_MTU_L3]);
+  mp->mtu[VNET_MTU_IP4] = ntohl (swif->mtu[VNET_MTU_IP4]);
+  mp->mtu[VNET_MTU_IP6] = ntohl (swif->mtu[VNET_MTU_IP6]);
+  mp->mtu[VNET_MTU_MPLS] = ntohl (swif->mtu[VNET_MTU_MPLS]);
+
   mp->context = context;
 
   strncpy ((char *) mp->interface_name,
 
         if (visible)
           vec_add1 (sorted_sis, si[0]);}
         ));
-      /* *INDENT-OFF* */
+      /* *INDENT-ON* */
       /* Sort by name. */
       vec_sort_with_function (sorted_sis, sw_interface_name_compare);
     }
                             format_ip6_address, r6, ia->address_length);
         }));
        /* *INDENT-ON* */
+      }
+    }
+  else
+    {
+      vec_foreach (si, sorted_sis)
+      {
+       vlib_cli_output (vm, "%U\n", format_vnet_sw_interface, vnm, si);
+      }
     }
-}
-
-else
-{
-  vec_foreach (si, sorted_sis)
-  {
-    vlib_cli_output (vm, "%U\n", format_vnet_sw_interface, vnm, si);
-  }
-}
 
 done:
-vec_free (sorted_sis);
-return error;
+  vec_free (sorted_sis);
+  return error;
 }
 
 /* *INDENT-OFF* */
 mtu_cmd (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd)
 {
   vnet_main_t *vnm = vnet_get_main ();
-  u32 hw_if_index, mtu;
+  u32 hw_if_index, sw_if_index, mtu;
   ethernet_main_t *em = ðernet_main;
+  u32 mtus[VNET_N_MTU] = { 0, 0, 0, 0 };
 
   if (unformat (input, "%d %U", &mtu,
                unformat_vnet_hw_interface, vnm, &hw_if_index))
     {
+      /*
+       * Change physical MTU on interface. Only supported for Ethernet
+       * interfaces
+       */
       vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_index);
       ethernet_interface_t *eif = ethernet_get_interface (em, hw_if_index);
 
                                  hi->max_supported_packet_bytes);
 
       vnet_hw_interface_set_mtu (vnm, hw_if_index, mtu);
+      goto done;
     }
+  else if (unformat (input, "packet %d %U", &mtu,
+                    unformat_vnet_sw_interface, vnm, &sw_if_index))
+    /* Set default packet MTU (including L3 header */
+    mtus[VNET_MTU_L3] = mtu;
+  else if (unformat (input, "ip4 %d %U", &mtu,
+                    unformat_vnet_sw_interface, vnm, &sw_if_index))
+    mtus[VNET_MTU_IP4] = mtu;
+  else if (unformat (input, "ip6 %d %U", &mtu,
+                    unformat_vnet_sw_interface, vnm, &sw_if_index))
+    mtus[VNET_MTU_IP6] = mtu;
+  else if (unformat (input, "mpls %d %U", &mtu,
+                    unformat_vnet_sw_interface, vnm, &sw_if_index))
+    mtus[VNET_MTU_MPLS] = mtu;
   else
     return clib_error_return (0, "unknown input `%U'",
                              format_unformat_error, input);
+
+  vnet_sw_interface_set_protocol_mtu (vnm, sw_if_index, mtus);
+
+done:
   return 0;
 }
 
 /* *INDENT-OFF* */
 VLIB_CLI_COMMAND (set_interface_mtu_cmd, static) = {
   .path = "set interface mtu",
-  .short_help = "set interface mtu <value> <interface>",
+  .short_help = "set interface mtu [packet|ip4|ip6|mpls] <value> <interface>",
   .function = mtu_cmd,
 };
 /* *INDENT-ON* */
 
   return s;
 }
 
+static u8 *
+format_vnet_sw_interface_mtu (u8 * s, va_list * args)
+{
+  vnet_sw_interface_t *si = va_arg (*args, vnet_sw_interface_t *);
+
+  return format (s, "%d/%d/%d/%d", si->mtu[VNET_MTU_L3],
+                si->mtu[VNET_MTU_IP4],
+                si->mtu[VNET_MTU_IP6], si->mtu[VNET_MTU_MPLS]);
+}
+
 u8 *
 format_vnet_sw_interface (u8 * s, va_list * args)
 {
   vnet_interface_main_t *im = &vnm->interface_main;
 
   if (!si)
-    return format (s, "%=32s%=5s%=16s%=16s%=16s",
-                  "Name", "Idx", "State", "Counter", "Count");
+    return format (s, "%=32s%=5s%=10s%=21s%=16s%=16s",
+                  "Name", "Idx", "State", "MTU (L3/IP4/IP6/MPLS)", "Counter",
+                  "Count");
 
-  s = format (s, "%-32U%=5d%=16U",
+  s = format (s, "%-32U%=5d%=10U%=21U",
              format_vnet_sw_interface_name, vnm, si, si->sw_if_index,
-             format_vnet_sw_interface_flags, si->flags);
+             format_vnet_sw_interface_flags, si->flags,
+             format_vnet_sw_interface_mtu, si);
 
   s = format_vnet_sw_interface_cntrs (s, im, si);
 
 
   return hw->flags;
 }
 
-always_inline uword
-vnet_hw_interface_get_mtu (vnet_main_t * vnm, u32 hw_if_index,
-                          vlib_rx_or_tx_t dir)
+always_inline u32
+vnet_hw_interface_get_mtu (vnet_main_t * vnm, u32 hw_if_index)
 {
   vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
-  return hw->max_l3_packet_bytes[dir];
+  return hw->max_packet_bytes;
 }
 
-always_inline uword
-vnet_sw_interface_get_mtu (vnet_main_t * vnm, u32 sw_if_index,
-                          vlib_rx_or_tx_t dir)
+always_inline u32
+vnet_sw_interface_get_mtu (vnet_main_t * vnm, u32 sw_if_index, vnet_mtu_t af)
 {
-  vnet_hw_interface_t *hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
-  return (hw->max_l3_packet_bytes[dir]);
+  vnet_sw_interface_t *sw = vnet_get_sw_interface (vnm, sw_if_index);
+  u32 mtu;
+  mtu = sw->mtu[af] > 0 ? sw->mtu[af] : sw->mtu[VNET_MTU_L3];
+  if (mtu == 0)
+    return 9000;               /* $$$ Deal with interface-types not setting MTU */
+  return mtu;
 }
 
 always_inline uword
 /* Set the MTU on the HW interface */
 void vnet_hw_interface_set_mtu (vnet_main_t * vnm, u32 hw_if_index, u32 mtu);
 
+/* Set the MTU on the SW interface */
+void vnet_sw_interface_set_mtu (vnet_main_t * vnm, u32 sw_if_index, u32 mtu);
+void vnet_sw_interface_set_protocol_mtu (vnet_main_t * vnm, u32 sw_if_index,
+                                        u32 mtu[]);
+
 /* update the unnumbered state of an interface */
 void vnet_sw_interface_update_unnumbered (u32 sw_if_index,
                                          u32 ip_sw_if_index, u8 enable);
 
     {
       if (is_add)
        {
-         vnet_hw_interface_t *hw_if0;
-
-         hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index);
-
          pool_get (nm->if_radv_pool, a);
 
          ri = a - nm->if_radv_pool;
          a->send_radv = 1;
 
          /* fill in radv_info for this interface that will be needed later */
-         a->adv_link_mtu = hw_if0->max_l3_packet_bytes[VLIB_RX];
+         a->adv_link_mtu =
+           vnet_sw_interface_get_mtu (vnm, sw_if_index, VNET_MTU_IP6);
 
          clib_memcpy (a->link_layer_address, eth_if0->address, 6);
 
 
       hi->min_packet_bytes = 64 + sizeof (ip6_header_t);
     }
 
-  hi->per_packet_overhead_bytes = /* preamble */ 8 + /* inter frame gap */ 12;
-
   /* Standard default ipip MTU. */
-  hi->max_l3_packet_bytes[VLIB_RX] = hi->max_l3_packet_bytes[VLIB_TX] = 9000;
+  vnet_sw_interface_set_mtu (vnm, sw_if_index, 9000);
 
   t->tunnel_src = *src;
   t->tunnel_dst = *dst;
 
   t->dev_instance = t_idx;
   t->user_instance = t_idx;
 
-  hi->max_l3_packet_bytes[VLIB_RX] = hi->max_l3_packet_bytes[VLIB_TX] = 1480;
+  vnet_sw_interface_set_mtu (vnet_get_main (), t->sw_if_index, 1480);
 
   ipip_tunnel_db_add (t, &key);
 
 
 
       hi->min_packet_bytes = 64 + sizeof (gre_header_t) +
        sizeof (ip4_header_t) + sizeof (esp_header_t) + sizeof (esp_footer_t);
-      hi->per_packet_overhead_bytes =
-       /* preamble */ 8 + /* inter frame gap */ 12;
 
       /* Standard default gre MTU. */
-      hi->max_l3_packet_bytes[VLIB_RX] = hi->max_l3_packet_bytes[VLIB_TX] =
-       9000;
+      /* TODO: Should take tunnel overhead into consideration */
+      vnet_sw_interface_set_mtu (vnm, sw_if_index, 9000);
 
       clib_memcpy (&t->tunnel_src, &a->src, sizeof (t->tunnel_src));
       clib_memcpy (&t->tunnel_dst, &a->dst, sizeof (t->tunnel_dst));
 
         hi = vnet_get_hw_interface (vnm, mt->mt_hw_if_index);
     }
 
+    /* Standard default MPLS tunnel MTU. */
+    vnet_sw_interface_set_mtu (vnm, hi->sw_if_index, 9000);
+
     /*
      * Add the new tunnel to the tunnel DB - key:SW if index
      */
 
   ip_copy (&listener->sub_conn[SCTP_PRIMARY_PATH_IDX].connection.lcl_ip,
           &tep->ip, tep->is_ip4);
 
-  listener->sub_conn[SCTP_PRIMARY_PATH_IDX].PMTU =
-    vnet_sw_interface_get_mtu (vnet_get_main (), tep->sw_if_index, VLIB_TX);
+  u32 mtu = tep->is_ip4 ? vnet_sw_interface_get_mtu (vnet_get_main (),
+                                                    tep->sw_if_index,
+                                                    VNET_MTU_IP4) :
+    vnet_sw_interface_get_mtu (vnet_get_main (), tep->sw_if_index,
+                              VNET_MTU_IP6);
+  listener->sub_conn[SCTP_PRIMARY_PATH_IDX].PMTU = mtu;
   listener->sub_conn[SCTP_PRIMARY_PATH_IDX].connection.is_ip4 = tep->is_ip4;
   listener->sub_conn[SCTP_PRIMARY_PATH_IDX].connection.proto =
     TRANSPORT_PROTO_SCTP;
 
   clib_spinlock_lock_if_init (&tm->half_open_lock);
   sctp_conn = sctp_half_open_connection_new (thread_id);
-  sctp_conn->sub_conn[idx].PMTU =
-    vnet_sw_interface_get_mtu (vnet_get_main (), rmt->sw_if_index, VLIB_TX);
+  u32 mtu = rmt->is_ip4 ? vnet_sw_interface_get_mtu (vnet_get_main (),
+                                                    rmt->sw_if_index,
+                                                    VNET_MTU_IP4) :
+    vnet_sw_interface_get_mtu (vnet_get_main (), rmt->sw_if_index,
+                              VNET_MTU_IP6);
+  sctp_conn->sub_conn[idx].PMTU = mtu;
 
   transport_connection_t *trans_conn = &sctp_conn->sub_conn[idx].connection;
   ip_copy (&trans_conn->rmt_ip, &rmt->ip, rmt->is_ip4);
 
   hi->min_packet_bytes = 40 + 16;
 
   /* Standard default ethernet MTU. */
-  hi->max_l3_packet_bytes[VLIB_RX] = hi->max_l3_packet_bytes[VLIB_TX] = 1500;
+  vnet_sw_interface_set_mtu (vnm, sw_if_index, 1500);
 
   vec_free (hi->hw_address);
   vec_add (hi->hw_address, address, sizeof (address));
 
     hw = vnet_get_hw_interface (tm->vnet_main, ti->hw_if_index);
     hw->min_supported_packet_bytes = TAP_MTU_MIN;
     hw->max_supported_packet_bytes = TAP_MTU_MAX;
-    hw->max_l3_packet_bytes[VLIB_RX] = hw->max_l3_packet_bytes[VLIB_TX] =
-      hw->max_supported_packet_bytes - sizeof (ethernet_header_t);
+    vnet_sw_interface_set_mtu (tm->vnet_main, hw->sw_if_index, 9000);
     ti->sw_if_index = hw->sw_if_index;
     if (ap->sw_if_indexp)
       *(ap->sw_if_indexp) = hw->sw_if_index;
 
     * sw_interface_add_del_functions[VNET_ITF_FUNC_N_PRIO];
     _vnet_interface_function_list_elt_t
     * sw_interface_admin_up_down_functions[VNET_ITF_FUNC_N_PRIO];
+    _vnet_interface_function_list_elt_t
+    * sw_interface_mtu_change_functions[VNET_ITF_FUNC_N_PRIO];
 
   uword *interface_tag_by_sw_if_index;
 
 
   FINISH;
 }
 
-static void *vl_api_sw_interface_set_mtu_t_print
-  (vl_api_sw_interface_set_mtu_t * mp, void *handle)
+static void *vl_api_hw_interface_set_mtu_t_print
+  (vl_api_hw_interface_set_mtu_t * mp, void *handle)
 {
   u8 *s;
 
 _(IP6_FIB_DUMP, ip6_fib_dump)                                           \
 _(FEATURE_ENABLE_DISABLE, feature_enable_disable)                      \
 _(SW_INTERFACE_TAG_ADD_DEL, sw_interface_tag_add_del)                  \
-_(SW_INTERFACE_SET_MTU, sw_interface_set_mtu)                           \
+_(HW_INTERFACE_SET_MTU, hw_interface_set_mtu)                           \
 _(P2P_ETHERNET_ADD, p2p_ethernet_add)                                   \
 _(P2P_ETHERNET_DEL, p2p_ethernet_del)                                  \
 _(TCP_CONFIGURE_SRC_ADDRESSES, tcp_configure_src_addresses)            \
 
                  UDP(sport=1234, dport=1234) /
                  Raw('\xa5' * 2000))
 
-        self.vapi.sw_interface_set_mtu(self.pg1.sw_if_index, 1500)
+        self.vapi.sw_interface_set_mtu(self.pg1.sw_if_index, [1500, 0, 0, 0])
 
         rx = self.send_and_expect(self.pg0, p_mtu * 65, self.pg0)
         rx = rx[0]
         self.assertEqual(icmp.src, self.pg0.remote_ip4)
         self.assertEqual(icmp.dst, self.pg1.remote_ip4)
 
-        self.vapi.sw_interface_set_mtu(self.pg1.sw_if_index, 2500)
+        self.vapi.sw_interface_set_mtu(self.pg1.sw_if_index, [2500, 0, 0, 0])
         rx = self.send_and_expect(self.pg0, p_mtu * 65, self.pg1)
 
+        # Reset MTU for subsequent tests
+        self.vapi.sw_interface_set_mtu(self.pg1.sw_if_index, [9000, 0, 0, 0])
 
 if __name__ == '__main__':
     unittest.main(testRunner=VppTestRunner)
 
         rv = self.vapi.sw_interface_dump()
         for i in rv:
             if i.sw_if_index == sw_if_index:
-                return i.link_mtu
+                return i.mtu[0]
         return 0
 
     def test_ip4_mtu(self):
         """ IP4 MTU test """
 
-        #
-        # TODO: Link MTU is 216 bytes 'off'. Fix when L3 MTU patches committed
-        #
-        mtu_offset = 216
         p_ether = Ether(src=self.pg0.remote_mac, dst=self.pg0.local_mac)
         p_ip4 = IP(src=self.pg0.remote_ip4, dst=self.pg1.remote_ip4,
                    flags='DF')
 
-        # TODO: Re-enable when MTU fixes are committed
         current_mtu = self.get_mtu(self.pg1.sw_if_index)
-        current_mtu -= mtu_offset
 
         p_payload = UDP(sport=1234, dport=1234) / self.payload(
             current_mtu - 20 - 8)
             self.validate(p[1], p4_reply)
 
         # MTU
-        self.vapi.sw_interface_set_mtu(self.pg1.sw_if_index, 576 + mtu_offset)
-        self.assertEqual(576, self.get_mtu(self.pg1.sw_if_index) - mtu_offset)
+        self.vapi.sw_interface_set_mtu(self.pg1.sw_if_index, [576, 0, 0, 0])
+        self.assertEqual(576, self.get_mtu(self.pg1.sw_if_index))
 
         # Should fail. Too large MTU
         p_icmp4 = ICMP(type='dest-unreach', code='fragmentation-needed',
         '''
         # Reset MTU
         self.vapi.sw_interface_set_mtu(self.pg1.sw_if_index,
-                                       current_mtu + mtu_offset)
+                                       [current_mtu, 0, 0, 0])
 
     def test_ip6_mtu(self):
         """ IP6 MTU test """
 
-        #
-        # TODO: Link MTU is 216 bytes 'off'. Fix when L3 MTU patches committed
-        #
-        mtu_offset = 216
         current_mtu = self.get_mtu(self.pg1.sw_if_index)
-        current_mtu -= mtu_offset
 
         p_ether = Ether(src=self.pg0.remote_mac, dst=self.pg0.local_mac)
         p_ip6 = IPv6(src=self.pg0.remote_ip6, dst=self.pg1.remote_ip6)
             self.validate(p[1], p6_reply)
 
         # MTU (only checked on encap)
-        self.vapi.sw_interface_set_mtu(self.pg1.sw_if_index, 1280 + mtu_offset)
-        self.assertEqual(1280, self.get_mtu(self.pg1.sw_if_index) - mtu_offset)
+        self.vapi.sw_interface_set_mtu(self.pg1.sw_if_index, [1280, 0, 0, 0])
+        self.assertEqual(1280, self.get_mtu(self.pg1.sw_if_index))
 
         # Should fail. Too large MTU
         p_icmp6 = ICMPv6PacketTooBig(mtu=1280, cksum=0x4c7a)
             self.validate_bytes(str(p[1]), icmp6_reply)
 
         # Reset MTU
-        self.vapi.sw_interface_set_mtu(self.pg1.sw_if_index, current_mtu)
+        self.vapi.sw_interface_set_mtu(self.pg1.sw_if_index,
+                                       [current_mtu, 0, 0, 0])
 
 
 if __name__ == '__main__':
 
                         {'sw_if_index': sw_if_index,
                          'admin_up_down': admin_up_down})
 
-    def sw_interface_set_mtu(self, sw_if_index, mtu):
+    def sw_interface_set_mtu(self, sw_if_index, mtu=[0, 0, 0, 0]):
         """
         :param sw_if_index:
         :param mtu: