X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=binapigen%2Fstrings.go;fp=binapigen%2Fstrings.go;h=1ab24e9bd221a8138817cc4777e48195b25e7673;hb=58da9ac6e691a8c660eb8ca838a154e11da0db68;hp=0000000000000000000000000000000000000000;hpb=a155cd438c6558da266c1c5931361ea088b35653;p=govpp.git diff --git a/binapigen/strings.go b/binapigen/strings.go new file mode 100644 index 0000000..1ab24e9 --- /dev/null +++ b/binapigen/strings.go @@ -0,0 +1,165 @@ +// Copyright (c) 2020 Cisco and/or its affiliates. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at: +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package binapigen + +import ( + "go/token" + "strings" + "unicode" + "unicode/utf8" +) + +// commonInitialisms is a set of common initialisms that need to stay in upper case. +var commonInitialisms = map[string]bool{ + "ACL": true, + "API": true, + // NOTE: There are only two occurences of the word 'ascii' and + // these already have initialism before and after ASCII part, + // thus disabling initialism for this case. + "ASCII": false, + "CPU": true, + "CSS": true, + "DNS": true, + "DHCP": true, + "EOF": true, + "GUID": true, + "HTML": true, + "HTTP": true, + "HTTPS": true, + "ID": true, + "IP": true, + "ICMP": true, + "JSON": true, + "LHS": true, + "QPS": true, + "PID": true, + "RAM": true, + "RHS": true, + "RPC": true, + "SLA": true, + "SMTP": true, + "SQL": true, + "SSH": true, + "TCP": true, + "TLS": true, + "TTL": true, + "UDP": true, + "UI": true, + "UID": true, + "UUID": true, + "URI": true, + "URL": true, + "UTF8": true, + "VM": true, + "VPN": true, + "XML": true, + "XMPP": true, + "XSRF": true, + "XSS": true, +} + +// specialInitialisms is a set of special initialisms that need part to stay in upper case. +var specialInitialisms = map[string]string{ + "IPV": "IPv", +} + +func usesInitialism(s string) string { + u := strings.ToUpper(s) + if commonInitialisms[u] { + return u + } else if su, ok := specialInitialisms[u]; ok { + return su + } + return "" +} + +// camelCaseName returns correct name identifier (camelCase). +func camelCaseName(name string) (should string) { + name = strings.Title(name) + + // Fast path for simple cases: "_" and all lowercase. + if name == "_" { + return name + } + allLower := true + for _, r := range name { + if !unicode.IsLower(r) { + allLower = false + break + } + } + if allLower { + return name + } + + // Split camelCase at any lower->upper transition, and split on underscores. + // Check each word for common initialisms. + runes := []rune(name) + w, i := 0, 0 // index of start of word, scan + for i+1 <= len(runes) { + eow := false // whether we hit the end of a word + if i+1 == len(runes) { + eow = true + } else if runes[i+1] == '_' { + // underscore; shift the remainder forward over any run of underscores + eow = true + n := 1 + for i+n+1 < len(runes) && runes[i+n+1] == '_' { + n++ + } + + // Leave at most one underscore if the underscore is between two digits + if i+n+1 < len(runes) && unicode.IsDigit(runes[i]) && unicode.IsDigit(runes[i+n+1]) { + n-- + } + + copy(runes[i+1:], runes[i+n+1:]) + runes = runes[:len(runes)-n] + } else if unicode.IsLower(runes[i]) && !unicode.IsLower(runes[i+1]) { + // lower->non-lower + eow = true + } + i++ + if !eow { + continue + } + + // [w,i) is a word. + word := string(runes[w:i]) + if u := usesInitialism(word); u != "" { + // Keep consistent case, which is lowercase only at the start. + if w == 0 && unicode.IsLower(runes[w]) { + u = strings.ToLower(u) + } + // All the common initialisms are ASCII, + // so we can replace the bytes exactly. + copy(runes[w:], []rune(u)) + } else if w > 0 && strings.ToLower(word) == word { + // already all lowercase, and not the first word, so uppercase the first character. + runes[w] = unicode.ToUpper(runes[w]) + } + w = i + } + return string(runes) +} + +// sanitizedName returns the string converted into a valid Go identifier. +func sanitizedName(s string) string { + r, _ := utf8.DecodeRuneInString(s) + if token.Lookup(s).IsKeyword() || !unicode.IsLetter(r) { + return s + "s" + } + return s +}