Serge Bazanski | cc25bdf | 2018-10-25 14:02:58 +0200 | [diff] [blame] | 1 | // Copyright 2015 The Go Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style |
| 3 | // license that can be found in the LICENSE file. |
| 4 | |
| 5 | // +build ignore |
| 6 | |
| 7 | package main |
| 8 | |
| 9 | // This code is shared between the main code generator and the test code. |
| 10 | |
| 11 | import ( |
| 12 | "flag" |
| 13 | "log" |
| 14 | "strconv" |
| 15 | "strings" |
| 16 | |
| 17 | "golang.org/x/text/internal/gen" |
| 18 | "golang.org/x/text/internal/ucd" |
| 19 | ) |
| 20 | |
| 21 | var ( |
| 22 | outputFile = flag.String("out", "tables.go", "output file") |
| 23 | ) |
| 24 | |
| 25 | var typeMap = map[string]elem{ |
| 26 | "A": tagAmbiguous, |
| 27 | "N": tagNeutral, |
| 28 | "Na": tagNarrow, |
| 29 | "W": tagWide, |
| 30 | "F": tagFullwidth, |
| 31 | "H": tagHalfwidth, |
| 32 | } |
| 33 | |
| 34 | // getWidthData calls f for every entry for which it is defined. |
| 35 | // |
| 36 | // f may be called multiple times for the same rune. The last call to f is the |
| 37 | // correct value. f is not called for all runes. The default tag type is |
| 38 | // Neutral. |
| 39 | func getWidthData(f func(r rune, tag elem, alt rune)) { |
| 40 | // Set the default values for Unified Ideographs. In line with Annex 11, |
| 41 | // we encode full ranges instead of the defined runes in Unified_Ideograph. |
| 42 | for _, b := range []struct{ lo, hi rune }{ |
| 43 | {0x4E00, 0x9FFF}, // the CJK Unified Ideographs block, |
| 44 | {0x3400, 0x4DBF}, // the CJK Unified Ideographs Externsion A block, |
| 45 | {0xF900, 0xFAFF}, // the CJK Compatibility Ideographs block, |
| 46 | {0x20000, 0x2FFFF}, // the Supplementary Ideographic Plane, |
| 47 | {0x30000, 0x3FFFF}, // the Tertiary Ideographic Plane, |
| 48 | } { |
| 49 | for r := b.lo; r <= b.hi; r++ { |
| 50 | f(r, tagWide, 0) |
| 51 | } |
| 52 | } |
| 53 | |
| 54 | inverse := map[rune]rune{} |
| 55 | maps := map[string]bool{ |
| 56 | "<wide>": true, |
| 57 | "<narrow>": true, |
| 58 | } |
| 59 | |
| 60 | // We cannot reuse package norm's decomposition, as we need an unexpanded |
| 61 | // decomposition. We make use of the opportunity to verify that the |
| 62 | // decomposition type is as expected. |
| 63 | ucd.Parse(gen.OpenUCDFile("UnicodeData.txt"), func(p *ucd.Parser) { |
| 64 | r := p.Rune(0) |
| 65 | s := strings.SplitN(p.String(ucd.DecompMapping), " ", 2) |
| 66 | if !maps[s[0]] { |
| 67 | return |
| 68 | } |
| 69 | x, err := strconv.ParseUint(s[1], 16, 32) |
| 70 | if err != nil { |
| 71 | log.Fatalf("Error parsing rune %q", s[1]) |
| 72 | } |
| 73 | if inverse[r] != 0 || inverse[rune(x)] != 0 { |
| 74 | log.Fatalf("Circular dependency in mapping between %U and %U", r, x) |
| 75 | } |
| 76 | inverse[r] = rune(x) |
| 77 | inverse[rune(x)] = r |
| 78 | }) |
| 79 | |
| 80 | // <rune range>;<type> |
| 81 | ucd.Parse(gen.OpenUCDFile("EastAsianWidth.txt"), func(p *ucd.Parser) { |
| 82 | tag, ok := typeMap[p.String(1)] |
| 83 | if !ok { |
| 84 | log.Fatalf("Unknown width type %q", p.String(1)) |
| 85 | } |
| 86 | r := p.Rune(0) |
| 87 | alt, ok := inverse[r] |
| 88 | if tag == tagFullwidth || tag == tagHalfwidth && r != wonSign { |
| 89 | tag |= tagNeedsFold |
| 90 | if !ok { |
| 91 | log.Fatalf("Narrow or wide rune %U has no decomposition", r) |
| 92 | } |
| 93 | } |
| 94 | f(r, tag, alt) |
| 95 | }) |
| 96 | } |