Skip to content

Commit

Permalink
updated to normal shell style regex
Browse files Browse the repository at this point in the history
  • Loading branch information
keizo committed Dec 29, 2024
1 parent 6c66914 commit a100689
Show file tree
Hide file tree
Showing 3 changed files with 63 additions and 10 deletions.
14 changes: 10 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,10 +57,16 @@ ggrab file1.py func1 func2 file2.js func3
# Now supports full directories (entire files only)
ggrab directory

# Skip files matching a regex pattern
ggrab directory/ --ignore '\.test\.' # Skip test files
ggrab src/ --ignore 'node_modules' # Skip node_modules directory
ggrab . --ignore '(\.test\.|\.git)' # Skip multiple patterns
# Skip files matching a pattern
ggrab directory/ --ignore *.test.* # Skip test files
ggrab src/ -i node_modules # Skip node_modules directory
ggrab . -i *.spec.* # Skip spec files
ggrab . -i dist # Skip dist directory
ggrab . -i *.min.* # Skip minified files
ggrab . -i vendor # Skip vendor directory
ggrab . -i '*.generated.*' # Skip generated files (quotes needed for *)
ggrab . -i "**/__pycache__/**" # Skip Python cache dirs (quotes needed for **)
ggrab . -i '(dist|build)/*' # Skip multiple dirs (quotes needed for special chars)
```

Press `TAB` at any time for smart autocompletion:
Expand Down
57 changes: 52 additions & 5 deletions ggrab/ggrab.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import argparse
from pathlib import Path
import time
import fnmatch

try:
import argcomplete
Expand Down Expand Up @@ -239,6 +240,42 @@

}

SPECIAL_FILENAMES = {
# Build & Project Files
"Makefile",
"makefile",
"Dockerfile",
"dockerfile",
"configure",
"Rakefile",
"Gemfile",

# Config Files
".env",
".gitignore",
".dockerignore",
".editorconfig",
".eslintrc",
".prettierrc",
".babelrc",
".npmrc",
".yarnrc",

# CI/CD & DevOps
"Jenkinsfile",
"Vagrantfile",
"Procfile",

# Documentation
"README",
"LICENSE",
"CHANGELOG",
"CONTRIBUTING",
"AUTHORS",
"PATENTS",
"NOTICE"
}


def lazy_import_ast():
global _ast
Expand Down Expand Up @@ -573,22 +610,32 @@ def is_dir_candidate(token):
def collect_code_files_in_dir(directory, ignore_pattern=None):
"""
Recursively gather all files in 'directory' whose extension
is in SUPPORTED_EXTENSIONS. Optionally skip files matching ignore regex.
Returns a list of absolute paths.
is in SUPPORTED_EXTENSIONS or whose filename is in SPECIAL_FILENAMES.
If ignore_pattern is provided, interpret it as a shell glob and skip matching files.
"""
code_files = []
ignore_re = re.compile(ignore_pattern) if ignore_pattern else None

ignore_re = None
if ignore_pattern:
# Convert the user's shell glob pattern into a Python regex
glob_regex = fnmatch.translate(ignore_pattern)
try:
ignore_re = re.compile(glob_regex)
except re.error as e:
print(f"[WARN] Invalid shell glob for --ignore: '{ignore_pattern}' => {e}")
ignore_re = None

for root, dirs, files in os.walk(directory):
for filename in files:
filepath = os.path.join(root, filename)

# Skip if path matches ignore pattern
# If ignoring pattern is set, skip if it matches
if ignore_re and ignore_re.search(filepath):
continue

# Check both extension and special filenames
ext = os.path.splitext(filename)[1].lower()
if ext in SUPPORTED_EXTENSIONS:
if ext in SUPPORTED_EXTENSIONS or filename in SPECIAL_FILENAMES:
code_files.append(os.path.abspath(filepath))

return code_files
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "ggrab"
version = "0.1.0a11"
version = "0.1.0a12"
description = "A CLI tool for extracting and sharing code files and functions. A manual context builder for pasting into llms."
readme = "README.md"
license = { text = "MIT" }
Expand Down

0 comments on commit a100689

Please sign in to comment.