binapigen/strings.go

   1 //  Copyright (c) 2020 Cisco and/or its affiliates.
   2 //
   3 //  Licensed under the Apache License, Version 2.0 (the "License");
   4 //  you may not use this file except in compliance with the License.
   5 //  You may obtain a copy of the License at:
   6 //
   7 //      http://www.apache.org/licenses/LICENSE-2.0
   8 //
   9 //  Unless required by applicable law or agreed to in writing, software
  10 //  distributed under the License is distributed on an "AS IS" BASIS,
  11 //  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12 //  See the License for the specific language governing permissions and
  13 //  limitations under the License.
  14
  15 package binapigen
  16
  17 import (
  18         "go/token"
  19         "strings"
  20         "unicode"
  21         "unicode/utf8"
  22 )
  23
  24 // commonInitialisms is a set of common initialisms that need to stay in upper case.
  25 var commonInitialisms = map[string]bool{
  26         "ACL": true,
  27         "API": true,
  28         // NOTE: There are only two occurences of the word 'ascii' and
  29         // these already have initialism before and after ASCII part,
  30         // thus disabling initialism for this case.
  31         "ASCII": false,
  32         "CPU":   true,
  33         "CSS":   true,
  34         "DNS":   true,
  35         "DHCP":  true,
  36         "EOF":   true,
  37         "GUID":  true,
  38         "HTML":  true,
  39         "HTTP":  true,
  40         "HTTPS": true,
  41         "ID":    true,
  42         "IP":    true,
  43         "ICMP":  true,
  44         "JSON":  true,
  45         "LHS":   true,
  46         "QPS":   true,
  47         "PID":   true,
  48         "RAM":   true,
  49         "RHS":   true,
  50         "RPC":   true,
  51         "SLA":   true,
  52         "SMTP":  true,
  53         "SQL":   true,
  54         "SSH":   true,
  55         "TCP":   true,
  56         "TLS":   true,
  57         "TTL":   true,
  58         "UDP":   true,
  59         "UI":    true,
  60         "UID":   true,
  61         "UUID":  true,
  62         "URI":   true,
  63         "URL":   true,
  64         "UTF8":  true,
  65         "VM":    true,
  66         "VPN":   true,
  67         "XML":   true,
  68         "XMPP":  true,
  69         "XSRF":  true,
  70         "XSS":   true,
  71 }
  72
  73 // specialInitialisms is a set of special initialisms that need part to stay in upper case.
  74 var specialInitialisms = map[string]string{
  75         "IPV": "IPv",
  76 }
  77
  78 func usesInitialism(s string) string {
  79         u := strings.ToUpper(s)
  80         if commonInitialisms[u] {
  81                 return u
  82         } else if su, ok := specialInitialisms[u]; ok {
  83                 return su
  84         }
  85         return ""
  86 }
  87
  88 // camelCaseName returns correct name identifier (camelCase).
  89 func camelCaseName(name string) (should string) {
  90         name = strings.Title(name)
  91
  92         // Fast path for simple cases: "_" and all lowercase.
  93         if name == "_" {
  94                 return name
  95         }
  96         allLower := true
  97         for _, r := range name {
  98                 if !unicode.IsLower(r) {
  99                         allLower = false
 100                         break
 101                 }
 102         }
 103         if allLower {
 104                 return name
 105         }
 106
 107         // Split camelCase at any lower->upper transition, and split on underscores.
 108         // Check each word for common initialisms.
 109         runes := []rune(name)
 110         w, i := 0, 0 // index of start of word, scan
 111         for i+1 <= len(runes) {
 112                 eow := false // whether we hit the end of a word
 113                 if i+1 == len(runes) {
 114                         eow = true
 115                 } else if runes[i+1] == '_' {
 116                         // underscore; shift the remainder forward over any run of underscores
 117                         eow = true
 118                         n := 1
 119                         for i+n+1 < len(runes) && runes[i+n+1] == '_' {
 120                                 n++
 121                         }
 122
 123                         // Leave at most one underscore if the underscore is between two digits
 124                         if i+n+1 < len(runes) && unicode.IsDigit(runes[i]) && unicode.IsDigit(runes[i+n+1]) {
 125                                 n--
 126                         }
 127
 128                         copy(runes[i+1:], runes[i+n+1:])
 129                         runes = runes[:len(runes)-n]
 130                 } else if unicode.IsLower(runes[i]) && !unicode.IsLower(runes[i+1]) {
 131                         // lower->non-lower
 132                         eow = true
 133                 }
 134                 i++
 135                 if !eow {
 136                         continue
 137                 }
 138
 139                 // [w,i) is a word.
 140                 word := string(runes[w:i])
 141                 if u := usesInitialism(word); u != "" {
 142                         // Keep consistent case, which is lowercase only at the start.
 143                         if w == 0 && unicode.IsLower(runes[w]) {
 144                                 u = strings.ToLower(u)
 145                         }
 146                         // All the common initialisms are ASCII,
 147                         // so we can replace the bytes exactly.
 148                         copy(runes[w:], []rune(u))
 149                 } else if w > 0 && strings.ToLower(word) == word {
 150                         // already all lowercase, and not the first word, so uppercase the first character.
 151                         runes[w] = unicode.ToUpper(runes[w])
 152                 }
 153                 w = i
 154         }
 155         return string(runes)
 156 }
 157
 158 // sanitizedName returns the string converted into a valid Go identifier.
 159 func sanitizedName(s string) string {
 160         r, _ := utf8.DecodeRuneInString(s)
 161         if token.Lookup(s).IsKeyword() || !unicode.IsLetter(r) {
 162                 return s + "s"
 163         }
 164         return s
 165 }