91 lines
1.5 KiB
Go
91 lines
1.5 KiB
Go
![]() |
package data
|
||
|
|
||
|
import (
|
||
|
"strings"
|
||
|
"unicode"
|
||
|
)
|
||
|
|
||
|
type WordBanTableExtend struct {
|
||
|
*WordBanTable
|
||
|
|
||
|
extList [][]string
|
||
|
extMap map[string]struct{}
|
||
|
}
|
||
|
|
||
|
func (this *WordBanTableExtend) init() error {
|
||
|
if this.WordBanTable == nil {
|
||
|
return nil
|
||
|
}
|
||
|
|
||
|
this.extMap = map[string]struct{}{}
|
||
|
for _, d := range this.WordBanTable.l {
|
||
|
var words = this.split(d.Lang1)
|
||
|
if len(words) > 2 {
|
||
|
this.extList = append(this.extList, words)
|
||
|
}
|
||
|
|
||
|
this.extMap[d.Lang1] = struct{}{}
|
||
|
}
|
||
|
|
||
|
return nil
|
||
|
}
|
||
|
|
||
|
func (this *WordBanTableExtend) Check(sentence string) bool {
|
||
|
if len(this.extMap) == 0 && len(this.extList) == 0 {
|
||
|
return true
|
||
|
}
|
||
|
|
||
|
if _, ok := this.extMap[sentence]; ok {
|
||
|
return false
|
||
|
}
|
||
|
|
||
|
for word := range this.extMap {
|
||
|
if strings.Index(sentence, word) >= 0 {
|
||
|
return false
|
||
|
}
|
||
|
}
|
||
|
|
||
|
for _, words := range this.extList {
|
||
|
var includes = 0
|
||
|
for _, word := range words {
|
||
|
if strings.Index(sentence, word) > 0 {
|
||
|
includes += 1
|
||
|
}
|
||
|
}
|
||
|
if includes >= len(words) {
|
||
|
return false
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return true
|
||
|
}
|
||
|
|
||
|
func (this *WordBanTableExtend) split(sentence string) []string {
|
||
|
var runes = []rune(sentence)
|
||
|
|
||
|
var word []rune
|
||
|
var words []string
|
||
|
for _, c := range runes {
|
||
|
if unicode.IsSpace(c) {
|
||
|
words = append(words, string(word))
|
||
|
word = nil
|
||
|
}
|
||
|
|
||
|
if unicode.Is(unicode.Han, c) {
|
||
|
if len(word) > 0 {
|
||
|
words = append(words, string(word))
|
||
|
word = nil
|
||
|
}
|
||
|
words = append(words, string([]rune{c}))
|
||
|
} else {
|
||
|
word = append(word, c)
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if len(word) > 0 {
|
||
|
words = append(words, string(word))
|
||
|
}
|
||
|
|
||
|
return words
|
||
|
}
|