Skip to content

Commit

Permalink
WIP
Browse files Browse the repository at this point in the history
  • Loading branch information
jdkato committed Dec 29, 2023
1 parent 83d4d9e commit 02504fb
Show file tree
Hide file tree
Showing 5 changed files with 145 additions and 82 deletions.
54 changes: 12 additions & 42 deletions internal/core/config.go
Original file line number Diff line number Diff line change
@@ -1,12 +1,9 @@
package core

import (
"bufio"
"encoding/json"
"io"
"os"
"path/filepath"
"strings"

"github.com/bmatcuk/doublestar/v4"
"github.com/errata-ai/ini"
Expand All @@ -23,13 +20,20 @@ var (
// can be set via the `--config` flag, the `VALE_CONFIG_PATH` environment
// variable, or the default search process.
//
// NOTE: The config pipeline is stored in the top-level `.config`
// NOTE: The config pipeline is stored in the top-level `.vale-config`
// directory. See `cmd/vale/sync.go`.
ConfigDir = "config"

VocabDir = filepath.Join(ConfigDir, "vocabularies")
DictDir = filepath.Join(ConfigDir, "dictionaries")
TmplDir = filepath.Join(ConfigDir, "templates")
// Vocabularies are loaded in `ini.go`.
VocabDir = filepath.Join(ConfigDir, "vocabularies")

// Dictionaries are loaded in `spelling.go#makeSpeller`.
DictDir = filepath.Join(ConfigDir, "dictionaries")

// Templates are loaded in `cmd/vale/custom.go`.
TmplDir = filepath.Join(ConfigDir, "templates")

// Ignore files are loaded in `spelling.go#NewSpelling`.
IgnoreDir = filepath.Join(ConfigDir, "ignore")
)

Expand Down Expand Up @@ -76,7 +80,6 @@ type Config struct {
IgnoredClasses []string // A list of HTML classes to ignore
IgnoredScopes []string // A list of HTML tags to ignore
MinAlertLevel int // Lowest alert level to display
Vocab []string // The active project
RuleToLevel map[string]string // Single-rule level changes
SBaseStyles map[string][]string // Syntax-specific base styles
SChecks map[string]map[string]bool // Syntax-specific checks
Expand All @@ -87,11 +90,9 @@ type Config struct {
WordTemplate string // The template used in YAML -> regexp list conversions
RootINI string // the path to the project's .vale.ini file

AcceptedTokens map[string]struct{} `json:"-"` // Project-specific vocabulary (okay)
RejectedTokens map[string]struct{} `json:"-"` // Project-specific vocabulary (avoid)

DictionaryPath string // Location to search for dictionaries.

Vocabularies []Vocabulary `json:"-"`
FallbackPath string `json:"-"`
SecToPat map[string]glob.Glob `json:"-"`
Styles []string `json:"-"`
Expand All @@ -111,14 +112,12 @@ type Config struct {
func NewConfig(flags *CLIFlags) (*Config, error) {
var cfg Config

cfg.AcceptedTokens = make(map[string]struct{})
cfg.BlockIgnores = make(map[string][]string)
cfg.Flags = flags
cfg.Formats = make(map[string]string)
cfg.Asciidoctor = make(map[string]string)
cfg.GChecks = make(map[string]bool)
cfg.MinAlertLevel = 1
cfg.RejectedTokens = make(map[string]struct{})
cfg.RuleToLevel = make(map[string]string)
cfg.SBaseStyles = make(map[string][]string)
cfg.SChecks = make(map[string]map[string]bool)
Expand All @@ -131,35 +130,6 @@ func NewConfig(flags *CLIFlags) (*Config, error) {
return &cfg, nil
}

// AddWordListFile adds vocab terms from a provided file.
func (c *Config) AddWordListFile(name string, accept bool) error {
fd, err := os.Open(name)
if err != nil {
return err
}
defer fd.Close()
return c.addWordList(fd, accept)
}

func (c *Config) addWordList(r io.Reader, accept bool) error {
scanner := bufio.NewScanner(r)
for scanner.Scan() {
word := strings.TrimSpace(scanner.Text())
if len(word) == 0 || strings.HasPrefix(word, "# ") { //nolint:gocritic
continue
} else if accept {
if _, ok := c.AcceptedTokens[word]; !ok {
c.AcceptedTokens[word] = struct{}{}
}
} else {
if _, ok := c.RejectedTokens[word]; !ok {
c.RejectedTokens[word] = struct{}{}
}
}
}
return scanner.Err()
}

func (c *Config) String() string {
c.StylesPath = filepath.ToSlash(c.StylesPath)
b, _ := json.MarshalIndent(c, "", " ")
Expand Down
1 change: 1 addition & 0 deletions internal/core/file.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ type File struct {
ChkToCtx map[string]string // maps a temporary context to a particular check
Comments map[string]bool // comment control statements
Metrics map[string]int // count-based metrics
Vocab *Vocabulary // user-defined vocabularies
history map[string]int // -
limits map[string]int // -
simple bool // -
Expand Down
34 changes: 28 additions & 6 deletions internal/core/ini.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,12 +55,22 @@ var syntaxOpts = map[string]func(string, *ini.Section, *Config) error{
candidate := sec.Key("Transform").String()
cfg.Stylesheets[label] = determinePath(cfg.Flags.Path, candidate)
return nil

},
"Lang": func(label string, sec *ini.Section, cfg *Config) error { //nolint:unparam
cfg.FormatToLang[label] = sec.Key("Lang").String()
return nil
},
"Vocab": func(label string, sec *ini.Section, cfg *Config) error { //nolint:unparam
names := mergeValues(sec.Key("Vocab").StringsWithShadows(","))

vocab, err := loadVocab(label, names, cfg)
if err != nil {
return err
}

cfg.Vocabularies = append(cfg.Vocabularies, *vocab)
return nil
},
}

var globalOpts = map[string]func(*ini.Section, *Config, []string){
Expand All @@ -80,6 +90,16 @@ var globalOpts = map[string]func(*ini.Section, *Config, []string){
"Lang": func(sec *ini.Section, cfg *Config, _ []string) {
cfg.FormatToLang["*"] = sec.Key("Lang").String()
},
"Vocab": func(sec *ini.Section, cfg *Config, _ []string) {
names := mergeValues(sec.Key("Vocab").StringsWithShadows(","))

vocab, err := loadVocab("*", names, cfg)
if err != nil {
panic(err)
}

cfg.Vocabularies = append(cfg.Vocabularies, *vocab)
},
}

var coreOpts = map[string]func(*ini.Section, *Config, []string) error{
Expand Down Expand Up @@ -141,12 +161,14 @@ var coreOpts = map[string]func(*ini.Section, *Config, []string) error{
return nil
},
"Vocab": func(sec *ini.Section, cfg *Config, _ []string) error {
cfg.Vocab = mergeValues(sec.Key("Vocab").StringsWithShadows(","))
for _, v := range cfg.Vocab {
if err := loadVocab(v, cfg); err != nil {
return err
}
names := mergeValues(sec.Key("Vocab").StringsWithShadows(","))

vocab, err := loadVocab("*", names, cfg)
if err != nil {
return err
}

cfg.Vocabularies = append(cfg.Vocabularies, *vocab)
return nil
},
"NLPEndpoint": func(sec *ini.Section, cfg *Config, _ []string) error { //nolint:unparam
Expand Down
34 changes: 0 additions & 34 deletions internal/core/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@ import (
"strings"
"unicode"

"github.com/karrick/godirwalk"

"github.com/errata-ai/vale/v2/internal/nlp"
)

Expand Down Expand Up @@ -285,38 +283,6 @@ func validateLevel(key, val string, cfg *Config) bool {
return true
}

func loadVocab(root string, cfg *Config) error {
target := ""
for _, p := range cfg.Paths {
opt := filepath.Join(p, VocabDir, root)
if IsDir(opt) {
target = opt
break
}
}

if target == "" {
return NewE100("vocab", fmt.Errorf(
"'%s/%s' directory does not exist", VocabDir, root))
}

err := godirwalk.Walk(target, &godirwalk.Options{
Callback: func(fp string, de *godirwalk.Dirent) error {
name := de.Name()
if name == "accept.txt" {
return cfg.AddWordListFile(fp, true)
} else if name == "reject.txt" {
return cfg.AddWordListFile(fp, false)
}
return nil
},
Unsorted: true,
AllowNonDirectory: true,
FollowSymbolicLinks: true})

return err
}

func TextToContext(text string, meta *nlp.Info) []nlp.TaggedWord {
context := []nlp.TaggedWord{}

Expand Down
104 changes: 104 additions & 0 deletions internal/core/vocab.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
package core

import (
"bufio"
"fmt"
"io"
"os"
"path/filepath"
"strings"

"github.com/errata-ai/vale/v2/internal/glob"
)

// A Vocabulary represents a set of accepted and rejected tokens.
type Vocabulary struct {
pattern glob.Glob
acceptedTokens map[string]struct{}
rejectedTokens map[string]struct{}
}

func NewVocabulary(section string) (*Vocabulary, error) {
compiled, err := glob.Compile(section)
if err != nil {
return nil, err
}
return &Vocabulary{
pattern: compiled,
acceptedTokens: make(map[string]struct{}),
rejectedTokens: make(map[string]struct{}),
}, nil
}

func (c *Vocabulary) Matches(fp string) bool {
return c.pattern.Match(fp)
}

// AddWordListFile adds vocab terms from a provided file.
func (c *Vocabulary) AddWordListFile(name string, accept bool) error {
fd, err := os.Open(name)
if err != nil {
return err
}
defer fd.Close()
return c.addWordList(fd, accept)
}

func (c *Vocabulary) addWordList(r io.Reader, accept bool) error {
scanner := bufio.NewScanner(r)
for scanner.Scan() {
word := strings.TrimSpace(scanner.Text())
if len(word) == 0 || strings.HasPrefix(word, "# ") { //nolint:gocritic
continue
} else if accept {
if _, ok := c.acceptedTokens[word]; !ok {
c.acceptedTokens[word] = struct{}{}
}
} else {
if _, ok := c.rejectedTokens[word]; !ok {
c.rejectedTokens[word] = struct{}{}
}
}
}
return scanner.Err()
}

func loadVocab(label string, names []string, cfg *Config) (*Vocabulary, error) {
vocab, err := NewVocabulary(label)
if err != nil {
return nil, err
}

for _, name := range names {
target := ""

for _, p := range cfg.Paths {
opt := filepath.Join(p, VocabDir, name)
if IsDir(opt) {
target = opt
break
}
}

if target == "" {
return nil, NewE100("vocab", fmt.Errorf(
"'%s/%s' directory does not exist", VocabDir, name))
}

accepted := filepath.Join(target, "accept.txt")
if FileExists(accepted) {
if err := vocab.AddWordListFile(accepted, true); err != nil {
return nil, NewE100("vocab", err)
}
}

rejected := filepath.Join(target, "reject.txt")
if FileExists(rejected) {
if err := vocab.AddWordListFile(rejected, false); err != nil {
return nil, NewE100("vocab", err)
}
}
}

return vocab, nil
}

0 comments on commit 02504fb

Please sign in to comment.