Skip to content

Commit

Permalink
switch emoji detection to external lib gomoji
Browse files Browse the repository at this point in the history
  • Loading branch information
umputun committed Jan 8, 2025
1 parent 4298319 commit 4941fff
Show file tree
Hide file tree
Showing 36 changed files with 63,838 additions and 69 deletions.
2 changes: 2 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ require (
github.com/didip/tollbooth/v7 v7.0.2
github.com/didip/tollbooth_chi v0.0.0-20220719025231-d662a7f6928f
github.com/fatih/color v1.18.0
github.com/forPelevin/gomoji v1.2.0
github.com/go-chi/chi/v5 v5.2.0
github.com/go-pkgz/lgr v0.11.1
github.com/go-pkgz/rest v1.20.2
Expand All @@ -33,6 +34,7 @@ require (
github.com/ncruces/go-strftime v0.1.9 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect
github.com/rivo/uniseg v0.4.3 // indirect
github.com/samber/lo v1.47.0 // indirect
golang.org/x/crypto v0.31.0 // indirect
golang.org/x/exp v0.0.0-20241217172543-b2144cdd0a67 // indirect
Expand Down
4 changes: 4 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkp
github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
github.com/fatih/color v1.18.0 h1:S8gINlzdQ840/4pfAwic/ZE0djQEH3wM94VfqLTZcOM=
github.com/fatih/color v1.18.0/go.mod h1:4FelSpRwEGDpQ12mAdzqdOukCy4u8WUtOY6lkT/6HfU=
github.com/forPelevin/gomoji v1.2.0 h1:9k4WVSSkE1ARO/BWywxgEUBvR/jMnao6EZzrql5nxJ8=
github.com/forPelevin/gomoji v1.2.0/go.mod h1:8+Z3KNGkdslmeGZBC3tCrwMrcPy5GRzAD+gL9NAwMXg=
github.com/go-chi/chi/v5 v5.2.0 h1:Aj1EtB0qR2Rdo2dG4O94RIU35w2lvQSj6BRA4+qwFL0=
github.com/go-chi/chi/v5 v5.2.0/go.mod h1:DslCQbL2OYiznFReuXYUmQ2hGd1aDpCnlMNITLSKoi8=
github.com/go-pkgz/expirable-cache v0.1.0/go.mod h1:GTrEl0X+q0mPNqN6dtcQXksACnzCBQ5k/k1SwXJsZKs=
Expand Down Expand Up @@ -57,6 +59,8 @@ github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZb
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE=
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo=
github.com/rivo/uniseg v0.4.3 h1:utMvzDsuh3suAEnhH0RdHmoPbU648o6CvXxTx4SBMOw=
github.com/rivo/uniseg v0.4.3/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
github.com/samber/lo v1.47.0 h1:z7RynLwP5nbyRscyvcD043DWYoOcYRv3mV8lBeqOCLc=
github.com/samber/lo v1.47.0/go.mod h1:RmDH9Ct32Qy3gduHQuKJ3gW1fMHAnE/fAzQuf6He5cU=
github.com/sandwich-go/gpt3-encoder v0.0.0-20230203030618-cd99729dd0dd h1:QN5WJmVBDTdmPsqYwLnhC/RocNkVVnNcUFTyC/8+0l0=
Expand Down
10 changes: 10 additions & 0 deletions lib/tgspam/detector.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ import (
"time"
"unicode"

"github.com/forPelevin/gomoji"

"github.com/umputun/tg-spam/lib/approved"
"github.com/umputun/tg-spam/lib/spamcheck"
)
Expand Down Expand Up @@ -765,3 +767,11 @@ func (d *Detector) ctxWithStoreTimeout() (context.Context, context.CancelFunc) {
}
return context.WithTimeout(context.Background(), d.StorageTimeout)
}

func cleanEmoji(s string) string {
return gomoji.RemoveEmojis(s)
}

func countEmoji(s string) int {
return len(gomoji.CollectAll(s))
}
56 changes: 56 additions & 0 deletions lib/tgspam/detector_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1262,3 +1262,59 @@ func TestCleanText(t *testing.T) {
})
}
}

//nolint:stylecheck // it has unicode symbols purposely
func Test_countEmoji(t *testing.T) {
tests := []struct {
name string
input string
count int
}{
{"NoEmoji", "Hello, world!", 0},
{"OneEmoji", "Hi there 👋", 1},
{"DupEmoji", "️‍🌈Hi 👋there 👋", 3},
{"TwoEmojis", "Good morning 🌞🌻", 2},
{"Mixed", "👨‍👩👦 Family emoji", 3},
{"TextAfterEmoji", "😊 Have a nice day!", 1},
{"OnlyEmojis", "😁🐶🍕", 3},
{"WithCyrillic", "Привет 🌞 🍕 мир! 👋", 3},
{"real1", "❗️НУЖЕН 1 ЧЕЛОВЕК НА ДИСТАНЦИОННУЮ РАБОТУ❗️", 2},
{"real2", "⏰💯⚡️💯🤝🤝🤝🤝🤝🤝🤝🤝 ❗️HУЖHЫ OТВЕТCТВЕHHЫЕ ЛЮДИ❗️ 🔤🔤 ➡️@yyyyy🥢" +
" ⚡️(OТ 2️⃣1️⃣ ВOЗРАCТ)🟢 🔋OHЛАЙH ЗАРАБOТOК 🟢 ✅COПРOВOЖДЕHИЕ🟢 ❗1-2 ЧАCА В ДЕHЬ 🟢 👍1️⃣2️⃣0️⃣0️⃣💸" +
"➕в неделю🟢 ПИCАТЬ ✉️@xxxxxx✉️", 38},
{"real3", "‼️СРОЧНО‼️ ‼️ЭТО КАСАЕТСЯ КАЖДОГО В ЭТОЙ ГРУППЕ‼️ 🔥Строго 20+ В данный момент проходит обучение " +
"для новичков 🔥 Сразу говорю - без наркотиков, инвестиций и прочей ерунды. 🔥 Быстрый старт, прибыль вы получите" +
" уже в первый день работы 🔥 Все легально 🔥 Для работы нужен смартфон и всего 1 час твоего времени" +
" в день 🔥 Доведём вас за ручку до прибыли ‼️", 11},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
assert.Equal(t, tt.count, countEmoji(tt.input))
})
}
}

//nolint:stylecheck // it has unicode symbols purposely
func Test_cleanEmoji(t *testing.T) {
tests := []struct {
name string
input string
clean string
}{
{"NoEmoji", "Hello, world!", "Hello, world!"},
{"OneEmoji", "Hi there 👋", "Hi there "},
{"TwoEmojis", "Good morning 🌞🌻", "Good morning "},
{"Mixed", "👨‍👩‍👧‍👦 Family emoji", " Family emoji"},
{"EmojiSequences", "🏳️‍🌈 Rainbow flag", " Rainbow flag"},
{"TextAfterEmoji", "😊 Have a nice day!", " Have a nice day!"},
{"OnlyEmojis", "😁🐶🍕", ""},
{"WithCyrillic", "Привет 🌞 🍕 мир! 👋", "Привет мир! "},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
assert.Equal(t, tt.clean, cleanEmoji(tt.input))
})
}
}
14 changes: 0 additions & 14 deletions lib/tgspam/emoji.go

This file was deleted.

55 changes: 0 additions & 55 deletions lib/tgspam/emoji_test.go

This file was deleted.

17 changes: 17 additions & 0 deletions vendor/github.com/forPelevin/gomoji/.gitignore

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

105 changes: 105 additions & 0 deletions vendor/github.com/forPelevin/gomoji/.golangci.yml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

21 changes: 21 additions & 0 deletions vendor/github.com/forPelevin/gomoji/LICENSE

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

20 changes: 20 additions & 0 deletions vendor/github.com/forPelevin/gomoji/Makefile

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 4941fff

Please sign in to comment.