Skip to content

Commit

Permalink
Add ctxspec package
Browse files Browse the repository at this point in the history
Enables user to specify which omitted parts of a map to expand via a DSL
  • Loading branch information
everestmz committed Dec 1, 2024
1 parent 8ddccb7 commit 9541a0b
Show file tree
Hide file tree
Showing 3 changed files with 307 additions and 0 deletions.
50 changes: 50 additions & 0 deletions ctxspec/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# ctxspec

A parser for context specifications that define which parts of source files to include in LLM context windows. Supports both full-file and symbol-level granularity.

## Usage

Specify files and symbols to include:

```go
//Include specific functions from main.go and the entire config.go file

specs, err := ctxspec.ParseContextSpec(`
main.go ProcessRequest HandleError
config.go
`)

// Quotes for items containing spaces
specs, err := ctxspec.ParseContextSpec(`
"complex file.go" "Process Request"
utils.go "error handling"
`)
```

## Syntax

Each line follows the format:

```
copyfilename [symbol1 symbol2 ...]
```

- First item: filename
- Subsequent items (optional): symbols to include from that file
- Use quotes for items with spaces: "my file.go" "My Function"
- Escape quotes in strings: file.go "Function \"name\""

The parser automatically merges multiple specifications for the same file:

```go
// These get merged:
main.go Func1
main.go Func2

// Equivalent to:
main.go Func1 Func2

// But specifying the whole file takes precedence:
main.go // Whole file
main.go Func1 // Ignored - whole file already selected
```
134 changes: 134 additions & 0 deletions ctxspec/ctxspec.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
package ctxspec

import (
"bufio"
"fmt"
"strings"
)

type FileContextSpec struct {
Filename string
Symbols []string
}

func MergeContextSpecs(specs ...*FileContextSpec) []*FileContextSpec {
filenameToSpec := map[string]*FileContextSpec{}

var out []*FileContextSpec

for _, spec := range specs {
if len(spec.Symbols) == 0 {
// Just specifying the file
filenameToSpec[spec.Filename] = spec
continue
}

if existing, ok := filenameToSpec[spec.Filename]; ok {
if len(existing.Symbols) == 0 {
// We've already selected the whole file
continue
}
existing.Symbols = append(existing.Symbols, spec.Symbols...)
} else {
filenameToSpec[spec.Filename] = spec
}
}

for _, spec := range filenameToSpec {
out = append(out, spec)
}

return out
}

func ParseContextSpec(contextDefinition string) ([]*FileContextSpec, error) {
scanner := bufio.NewScanner(strings.NewReader(contextDefinition))
scanner.Split(bufio.ScanLines)

var items []*FileContextSpec

for scanner.Scan() {
line := scanner.Text()
newItem, err := ParseSpecLine(line)
if err != nil {
return nil, fmt.Errorf("Error for line '%s': %w", line, err)
}

items = append(items, newItem)
}

return MergeContextSpecs(items...), nil
}

func ParseSpecLine(line string) (*FileContextSpec, error) {
if line == "" {
return nil, nil
}

parts, err := getLineParts(line)
if err != nil {
return nil, err
}

filename := parts[0]
contextItem := &FileContextSpec{
Filename: filename,
}

// Our options right now are a whole file, or a symbol.
// Each row can have one filename, but multiple options for symbols
if len(parts) == 1 {
return contextItem, nil
}

// We have more than one item for this file
for _, item := range parts[1:] {
contextItem.Symbols = append(contextItem.Symbols, item)
}

return contextItem, nil
}

func getLineParts(line string) ([]string, error) {
var parts []string

var currentPart strings.Builder

var inQuotes bool
var escaped bool

for _, char := range line {
switch {
case escaped:
currentPart.WriteRune(char)
escaped = false
case char == '\\':
escaped = true
case char == '"' && !escaped:
inQuotes = !inQuotes

if !inQuotes {
parts = append(parts, currentPart.String())
currentPart.Reset()
}

case char == ' ' && !inQuotes:
if currentPart.Len() > 0 {
parts = append(parts, currentPart.String())
currentPart.Reset()
}
default:
currentPart.WriteRune(char)
}
}

if currentPart.Len() > 0 {
parts = append(parts, currentPart.String())
}

if inQuotes {
return nil, fmt.Errorf("Found quote with no matching closing quote")
}

return parts, nil
}
123 changes: 123 additions & 0 deletions ctxspec/ctxspec_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
package ctxspec

import (
"testing"
)

func TestParseContextSpec(t *testing.T) {
tests := []struct {
name string
input string
want []*FileContextSpec
wantErr bool
}{
{
name: "empty input",
input: "",
want: nil, // Change this from empty slice to nil
},
{
name: "single file no symbols",
input: "main.go",
want: []*FileContextSpec{
{Filename: "main.go"},
},
},
{
name: "single file with symbols",
input: `main.go MyFunc AnotherFunc`,
want: []*FileContextSpec{
{
Filename: "main.go",
Symbols: []string{"MyFunc", "AnotherFunc"},
},
},
},
{
name: "multiple files with symbols",
input: `main.go MyFunc
parser.go Parse ParseLine`,
want: []*FileContextSpec{
{
Filename: "main.go",
Symbols: []string{"MyFunc"},
},
{
Filename: "parser.go",
Symbols: []string{"Parse", "ParseLine"},
},
},
},
{
name: "quoted strings with spaces",
input: `"main file.go" "My Function"
"complex parser.go" "Parse Items"`,
want: []*FileContextSpec{
{
Filename: "main file.go",
Symbols: []string{"My Function"},
},
{
Filename: "complex parser.go",
Symbols: []string{"Parse Items"},
},
},
},
{
name: "unclosed quote",
input: `main.go "unclosed`,
wantErr: true,
},
{
name: "escaped quotes",
input: `main.go "Method \"quoted\" name"`,
want: []*FileContextSpec{
{
Filename: "main.go",
Symbols: []string{`Method "quoted" name`},
},
},
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got, err := ParseContextSpec(tt.input)
if (err != nil) != tt.wantErr {
t.Errorf("ParseContextSpec() error = %v, wantErr %v", err, tt.wantErr)
return
}
if !tt.wantErr {
if (got == nil) != (tt.want == nil) {
t.Errorf("ParseContextSpec() nil mismatch: got = %v, want = %v", got, tt.want)
return
}
if len(got) != len(tt.want) {
t.Errorf("ParseContextSpec() length mismatch: got = %v, want = %v", got, tt.want)
return
}
for i := range got {
if got[i].Filename != tt.want[i].Filename {
t.Errorf("ParseContextSpec() filename mismatch at index %d: got = %q, want = %q", i, got[i].Filename, tt.want[i].Filename)
}
if !slicesEqual(got[i].Symbols, tt.want[i].Symbols) {
t.Errorf("ParseContextSpec() symbols mismatch at index %d: got = %v, want = %v", i, got[i].Symbols, tt.want[i].Symbols)
}
}
}
})
}
}

// Helper function to compare slices
func slicesEqual(a, b []string) bool {
if len(a) != len(b) {
return false
}
for i := range a {
if a[i] != b[i] {
return false
}
}
return true
}

0 comments on commit 9541a0b

Please sign in to comment.