domain_set.go 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211
  1. package trie
  2. // Package succinct provides several succinct data types.
  3. // Modify from https://github.com/openacid/succinct/blob/d4684c35d123f7528b14e03c24327231723db704/sskv.go
  4. import (
  5. "sort"
  6. "strings"
  7. "github.com/metacubex/mihomo/common/utils"
  8. "github.com/openacid/low/bitmap"
  9. )
  10. const (
  11. complexWildcardByte = byte('+')
  12. wildcardByte = byte('*')
  13. domainStepByte = byte('.')
  14. )
  15. type DomainSet struct {
  16. leaves, labelBitmap []uint64
  17. labels []byte
  18. ranks, selects []int32
  19. }
  20. type qElt struct{ s, e, col int }
  21. // NewDomainSet creates a new *DomainSet struct, from a DomainTrie.
  22. func (t *DomainTrie[T]) NewDomainSet() *DomainSet {
  23. reserveDomains := make([]string, 0)
  24. t.Foreach(func(domain string, data T) bool {
  25. reserveDomains = append(reserveDomains, utils.Reverse(domain))
  26. return true
  27. })
  28. // ensure that the same prefix is continuous
  29. // and according to the ascending sequence of length
  30. sort.Strings(reserveDomains)
  31. keys := reserveDomains
  32. if len(keys) == 0 {
  33. return nil
  34. }
  35. ss := &DomainSet{}
  36. lIdx := 0
  37. queue := []qElt{{0, len(keys), 0}}
  38. for i := 0; i < len(queue); i++ {
  39. elt := queue[i]
  40. if elt.col == len(keys[elt.s]) {
  41. elt.s++
  42. // a leaf node
  43. setBit(&ss.leaves, i, 1)
  44. }
  45. for j := elt.s; j < elt.e; {
  46. frm := j
  47. for ; j < elt.e && keys[j][elt.col] == keys[frm][elt.col]; j++ {
  48. }
  49. queue = append(queue, qElt{frm, j, elt.col + 1})
  50. ss.labels = append(ss.labels, keys[frm][elt.col])
  51. setBit(&ss.labelBitmap, lIdx, 0)
  52. lIdx++
  53. }
  54. setBit(&ss.labelBitmap, lIdx, 1)
  55. lIdx++
  56. }
  57. ss.init()
  58. return ss
  59. }
  60. // Has query for a key and return whether it presents in the DomainSet.
  61. func (ss *DomainSet) Has(key string) bool {
  62. if ss == nil {
  63. return false
  64. }
  65. key = utils.Reverse(key)
  66. key = strings.ToLower(key)
  67. // no more labels in this node
  68. // skip character matching
  69. // go to next level
  70. nodeId, bmIdx := 0, 0
  71. type wildcardCursor struct {
  72. bmIdx, index int
  73. }
  74. stack := make([]wildcardCursor, 0)
  75. for i := 0; i < len(key); i++ {
  76. RESTART:
  77. c := key[i]
  78. for ; ; bmIdx++ {
  79. if getBit(ss.labelBitmap, bmIdx) != 0 {
  80. if len(stack) > 0 {
  81. cursor := stack[len(stack)-1]
  82. stack = stack[0 : len(stack)-1]
  83. // back wildcard and find next node
  84. nextNodeId := countZeros(ss.labelBitmap, ss.ranks, cursor.bmIdx+1)
  85. nextBmIdx := selectIthOne(ss.labelBitmap, ss.ranks, ss.selects, nextNodeId-1) + 1
  86. j := cursor.index
  87. for ; j < len(key) && key[j] != domainStepByte; j++ {
  88. }
  89. if j == len(key) {
  90. if getBit(ss.leaves, nextNodeId) != 0 {
  91. return true
  92. } else {
  93. goto RESTART
  94. }
  95. }
  96. for ; nextBmIdx-nextNodeId < len(ss.labels); nextBmIdx++ {
  97. if ss.labels[nextBmIdx-nextNodeId] == domainStepByte {
  98. bmIdx = nextBmIdx
  99. nodeId = nextNodeId
  100. i = j
  101. goto RESTART
  102. }
  103. }
  104. }
  105. return false
  106. }
  107. // handle wildcard for domain
  108. if ss.labels[bmIdx-nodeId] == complexWildcardByte {
  109. return true
  110. } else if ss.labels[bmIdx-nodeId] == wildcardByte {
  111. cursor := wildcardCursor{}
  112. cursor.bmIdx = bmIdx
  113. cursor.index = i
  114. stack = append(stack, cursor)
  115. } else if ss.labels[bmIdx-nodeId] == c {
  116. break
  117. }
  118. }
  119. nodeId = countZeros(ss.labelBitmap, ss.ranks, bmIdx+1)
  120. bmIdx = selectIthOne(ss.labelBitmap, ss.ranks, ss.selects, nodeId-1) + 1
  121. }
  122. return getBit(ss.leaves, nodeId) != 0
  123. }
  124. func (ss *DomainSet) keys(f func(key string) bool) {
  125. var currentKey []byte
  126. var traverse func(int, int) bool
  127. traverse = func(nodeId, bmIdx int) bool {
  128. if getBit(ss.leaves, nodeId) != 0 {
  129. if !f(string(currentKey)) {
  130. return false
  131. }
  132. }
  133. for ; ; bmIdx++ {
  134. if getBit(ss.labelBitmap, bmIdx) != 0 {
  135. return true
  136. }
  137. nextLabel := ss.labels[bmIdx-nodeId]
  138. currentKey = append(currentKey, nextLabel)
  139. nextNodeId := countZeros(ss.labelBitmap, ss.ranks, bmIdx+1)
  140. nextBmIdx := selectIthOne(ss.labelBitmap, ss.ranks, ss.selects, nextNodeId-1) + 1
  141. if !traverse(nextNodeId, nextBmIdx) {
  142. return false
  143. }
  144. currentKey = currentKey[:len(currentKey)-1]
  145. }
  146. }
  147. traverse(0, 0)
  148. return
  149. }
  150. func (ss *DomainSet) Foreach(f func(key string) bool) {
  151. ss.keys(func(key string) bool {
  152. return f(utils.Reverse(key))
  153. })
  154. }
  155. func setBit(bm *[]uint64, i int, v int) {
  156. for i>>6 >= len(*bm) {
  157. *bm = append(*bm, 0)
  158. }
  159. (*bm)[i>>6] |= uint64(v) << uint(i&63)
  160. }
  161. func getBit(bm []uint64, i int) uint64 {
  162. return bm[i>>6] & (1 << uint(i&63))
  163. }
  164. // init builds pre-calculated cache to speed up rank() and select()
  165. func (ss *DomainSet) init() {
  166. ss.selects, ss.ranks = bitmap.IndexSelect32R64(ss.labelBitmap)
  167. }
  168. // countZeros counts the number of "0" in a bitmap before the i-th bit(excluding
  169. // the i-th bit) on behalf of rank index.
  170. // E.g.:
  171. //
  172. // countZeros("010010", 4) == 3
  173. // // 012345
  174. func countZeros(bm []uint64, ranks []int32, i int) int {
  175. a, _ := bitmap.Rank64(bm, ranks, int32(i))
  176. return i - int(a)
  177. }
  178. // selectIthOne returns the index of the i-th "1" in a bitmap, on behalf of rank
  179. // and select indexes.
  180. // E.g.:
  181. //
  182. // selectIthOne("010010", 1) == 4
  183. // // 012345
  184. func selectIthOne(bm []uint64, ranks, selects []int32, i int) int {
  185. a, _ := bitmap.Select32R64(bm, selects, ranks, int32(i))
  186. return int(a)
  187. }