diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 536adfa..4c6e38e 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -14,7 +14,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13"] + python-version: ["3.8", "3.11", "3.13"] steps: - uses: actions/checkout@v4 @@ -28,6 +28,9 @@ jobs: run: | python -m pip install --upgrade pip pip install .[dev] + - name: Run pylint + run: | + pylint repo_to_text - name: Run tests run: | pytest tests/ \ No newline at end of file diff --git a/README.md b/README.md index 0a7f9df..09fcf8f 100644 --- a/README.md +++ b/README.md @@ -180,7 +180,7 @@ To install `repo-to-text` locally for development, follow these steps: 2. Install the package with development dependencies: ```bash - pip install -e .[dev] + pip install -e ".[dev]" ``` ### Requirements diff --git a/pyproject.toml b/pyproject.toml index 12b6a3b..1a0e941 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "repo-to-text" -version = "0.5.3" +version = "0.5.4" authors = [ { name = "Kirill Markin", email = "markinkirill@gmail.com" }, ] @@ -41,4 +41,10 @@ dev = [ "isort", "build", "twine", + "pylint", +] + +[tool.pylint] +disable = [ + "C0303", ] diff --git a/repo_to_text/__init__.py b/repo_to_text/__init__.py index 4568cbb..c86b6ca 100644 --- a/repo_to_text/__init__.py +++ b/repo_to_text/__init__.py @@ -1,2 +1,4 @@ +"""This is the main package for the repo_to_text package.""" + __author__ = 'Kirill Markin' __email__ = 'markinkirill@gmail.com' diff --git a/repo_to_text/cli/__init__.py b/repo_to_text/cli/__init__.py index da7c121..8cad49f 100644 --- a/repo_to_text/cli/__init__.py +++ b/repo_to_text/cli/__init__.py @@ -1,3 +1,5 @@ +"""This module contains the CLI interface for the repo_to_text package.""" + from .cli import create_default_settings_file, parse_args, main -__all__ = ['create_default_settings_file', 'parse_args', 'main'] \ No newline at end of file +__all__ = ['create_default_settings_file', 'parse_args', 'main'] diff --git a/repo_to_text/cli/cli.py b/repo_to_text/cli/cli.py index 286ed8c..c3b3166 100644 --- a/repo_to_text/cli/cli.py +++ b/repo_to_text/cli/cli.py @@ -1,3 +1,7 @@ +""" +CLI for repo-to-text +""" + import argparse import textwrap import os @@ -12,8 +16,11 @@ def create_default_settings_file() -> None: """Create a default .repo-to-text-settings.yaml file.""" settings_file = '.repo-to-text-settings.yaml' if os.path.exists(settings_file): - raise FileExistsError(f"The settings file '{settings_file}' already exists. Please remove it or rename it if you want to create a new default settings file.") - + raise FileExistsError( + f"The settings file '{settings_file}' already exists. " + "Please remove it or rename it if you want to create a new default settings file." + ) + default_settings = textwrap.dedent("""\ # Details: https://github.com/kirill-markin/repo-to-text # Syntax: gitignore rules @@ -32,7 +39,7 @@ def create_default_settings_file() -> None: - "README.md" - "LICENSE" """) - with open('.repo-to-text-settings.yaml', 'w') as f: + with open('.repo-to-text-settings.yaml', 'w', encoding='utf-8') as f: f.write(default_settings) print("Default .repo-to-text-settings.yaml created.") @@ -42,13 +49,25 @@ def parse_args() -> argparse.Namespace: Returns: argparse.Namespace: Parsed command line arguments """ - parser = argparse.ArgumentParser(description='Convert repository structure and contents to text') + parser = argparse.ArgumentParser( + description='Convert repository structure and contents to text' + ) parser.add_argument('input_dir', nargs='?', default='.', help='Directory to process') parser.add_argument('--debug', action='store_true', help='Enable debug logging') parser.add_argument('--output-dir', type=str, help='Directory to save the output file') - parser.add_argument('--create-settings', '--init', action='store_true', help='Create default .repo-to-text-settings.yaml file') + parser.add_argument( + '--create-settings', + '--init', + action='store_true', + help='Create default .repo-to-text-settings.yaml file' + ) parser.add_argument('--stdout', action='store_true', help='Output to stdout instead of a file') - parser.add_argument('--ignore-patterns', nargs='*', help="List of files or directories to ignore in both tree and content sections. Supports wildcards (e.g., '*').") + parser.add_argument( + '--ignore-patterns', + nargs='*', + help="List of files or directories to ignore in both tree and content sections. " + "Supports wildcards (e.g., '*')." + ) return parser.parse_args() def main() -> NoReturn: @@ -60,7 +79,7 @@ def main() -> NoReturn: args = parse_args() setup_logging(debug=args.debug) logging.debug('repo-to-text script started') - + try: if args.create_settings: create_default_settings_file() @@ -72,9 +91,9 @@ def main() -> NoReturn: to_stdout=args.stdout, cli_ignore_patterns=args.ignore_patterns ) - + logging.debug('repo-to-text script finished') sys.exit(0) - except Exception as e: - logging.error(f'Error occurred: {str(e)}') - sys.exit(1) \ No newline at end of file + except (FileNotFoundError, FileExistsError, PermissionError, OSError) as e: + logging.error('Error occurred: %s', str(e)) + sys.exit(1) diff --git a/repo_to_text/core/__init__.py b/repo_to_text/core/__init__.py index 2c937c6..bee4437 100644 --- a/repo_to_text/core/__init__.py +++ b/repo_to_text/core/__init__.py @@ -1,3 +1,5 @@ +"""This module contains the core functionality of the repo_to_text package.""" + from .core import get_tree_structure, load_ignore_specs, should_ignore_file, save_repo_to_text -__all__ = ['get_tree_structure', 'load_ignore_specs', 'should_ignore_file', 'save_repo_to_text'] \ No newline at end of file +__all__ = ['get_tree_structure', 'load_ignore_specs', 'should_ignore_file', 'save_repo_to_text'] diff --git a/repo_to_text/core/core.py b/repo_to_text/core/core.py index 003d262..70bee20 100644 --- a/repo_to_text/core/core.py +++ b/repo_to_text/core/core.py @@ -1,69 +1,116 @@ +""" +Core functionality for repo-to-text +""" + import os import subprocess +from typing import Tuple, Optional, List, Dict, Any, Set +from datetime import datetime, timezone +from importlib.machinery import ModuleSpec import logging import yaml -from datetime import datetime, timezone -from typing import Tuple, Optional, List import pathspec from pathspec import PathSpec -from ..utils.utils import check_tree_command, is_ignored_path, remove_empty_dirs +from ..utils.utils import check_tree_command, is_ignored_path -def get_tree_structure(path: str = '.', gitignore_spec: Optional[PathSpec] = None, tree_and_content_ignore_spec: Optional[PathSpec] = None) -> str: - """Generate tree structure of the directory. - - Args: - path: Directory path to generate tree for - gitignore_spec: PathSpec object for gitignore patterns - tree_and_content_ignore_spec: PathSpec object for tree and content ignore patterns - - Returns: - str: Generated tree structure - """ +def get_tree_structure( + path: str = '.', + gitignore_spec: Optional[PathSpec] = None, + tree_and_content_ignore_spec: Optional[PathSpec] = None + ) -> str: + """Generate tree structure of the directory.""" if not check_tree_command(): return "" - - logging.debug(f'Generating tree structure for path: {path}') - result = subprocess.run(['tree', '-a', '-f', '--noreport', path], stdout=subprocess.PIPE) - tree_output = result.stdout.decode('utf-8') - logging.debug(f'Tree output generated:\n{tree_output}') + + logging.debug('Generating tree structure for path: %s', path) + tree_output = run_tree_command(path) + logging.debug('Tree output generated:\n%s', tree_output) if not gitignore_spec and not tree_and_content_ignore_spec: logging.debug('No .gitignore or ignore-tree-and-content specification found') return tree_output - logging.debug('Filtering tree output based on .gitignore and ignore-tree-and-content specification') - filtered_lines: List[str] = [] + logging.debug('Filtering tree output based on ignore specifications') + return filter_tree_output(tree_output, path, gitignore_spec, tree_and_content_ignore_spec) - for line in tree_output.splitlines(): - idx = line.find('./') - if idx == -1: - idx = line.find(path) - if idx != -1: - full_path = line[idx:].strip() - else: - continue - - if full_path == '.': - continue +def run_tree_command(path: str) -> str: + """Run the tree command and return its output.""" + result = subprocess.run( + ['tree', '-a', '-f', '--noreport', path], + stdout=subprocess.PIPE, + check=True + ) + return result.stdout.decode('utf-8') - relative_path = os.path.relpath(full_path, path) - relative_path = relative_path.replace(os.sep, '/') - if os.path.isdir(full_path): - relative_path += '/' +def filter_tree_output( + tree_output: str, + path: str, + gitignore_spec: Optional[PathSpec], + tree_and_content_ignore_spec: Optional[PathSpec] + ) -> str: + """Filter the tree output based on ignore specifications.""" + lines: List[str] = tree_output.splitlines() + non_empty_dirs: Set[str] = set() - if not should_ignore_file(full_path, relative_path, gitignore_spec, None, tree_and_content_ignore_spec): - display_line = line.replace('./', '', 1) - filtered_lines.append(display_line) - else: - logging.debug(f'Ignored: {relative_path}') + filtered_lines = [ + process_line(line, path, gitignore_spec, tree_and_content_ignore_spec, non_empty_dirs) + for line in lines + ] - filtered_tree_output = '\n'.join(filtered_lines) - logging.debug(f'Filtered tree structure:\n{filtered_tree_output}') - logging.debug('Tree structure filtering complete') + filtered_tree_output = '\n'.join(filter(None, filtered_lines)) + logging.debug('Filtered tree structure:\n%s', filtered_tree_output) return filtered_tree_output -def load_ignore_specs(path: str = '.', cli_ignore_patterns: Optional[List[str]] = None) -> Tuple[Optional[PathSpec], Optional[PathSpec], PathSpec]: +def process_line( + line: str, + path: str, + gitignore_spec: Optional[PathSpec], + tree_and_content_ignore_spec: Optional[PathSpec], + non_empty_dirs: Set[str] + ) -> Optional[str]: + """Process a single line of the tree output.""" + full_path = extract_full_path(line, path) + if not full_path or full_path == '.': + return None + + relative_path = os.path.relpath(full_path, path).replace(os.sep, '/') + + if should_ignore_file( + full_path, + relative_path, + gitignore_spec, + None, + tree_and_content_ignore_spec + ): + logging.debug('Ignored: %s', relative_path) + return None + + if not os.path.isdir(full_path): + mark_non_empty_dirs(relative_path, non_empty_dirs) + + if not os.path.isdir(full_path) or os.path.dirname(relative_path) in non_empty_dirs: + return line.replace('./', '', 1) + return None + +def extract_full_path(line: str, path: str) -> Optional[str]: + """Extract the full path from a line of tree output.""" + idx = line.find('./') + if idx == -1: + idx = line.find(path) + return line[idx:].strip() if idx != -1 else None + +def mark_non_empty_dirs(relative_path: str, non_empty_dirs: Set[str]) -> None: + """Mark all parent directories of a file as non-empty.""" + dir_path = os.path.dirname(relative_path) + while dir_path: + non_empty_dirs.add(dir_path) + dir_path = os.path.dirname(dir_path) + +def load_ignore_specs( + path: str = '.', + cli_ignore_patterns: Optional[List[str]] = None + ) -> Tuple[Optional[PathSpec], Optional[PathSpec], PathSpec]: """Load ignore specifications from various sources. Args: @@ -71,7 +118,8 @@ def load_ignore_specs(path: str = '.', cli_ignore_patterns: Optional[List[str]] cli_ignore_patterns: List of patterns from command line Returns: - Tuple[Optional[PathSpec], Optional[PathSpec], PathSpec]: Tuple of gitignore_spec, content_ignore_spec, and tree_and_content_ignore_spec + Tuple[Optional[PathSpec], Optional[PathSpec], PathSpec]: Tuple of gitignore_spec, + content_ignore_spec, and tree_and_content_ignore_spec """ gitignore_spec = None content_ignore_spec = None @@ -80,14 +128,16 @@ def load_ignore_specs(path: str = '.', cli_ignore_patterns: Optional[List[str]] repo_settings_path = os.path.join(path, '.repo-to-text-settings.yaml') if os.path.exists(repo_settings_path): - logging.debug(f'Loading .repo-to-text-settings.yaml from path: {repo_settings_path}') - with open(repo_settings_path, 'r') as f: - settings = yaml.safe_load(f) + logging.debug('Loading .repo-to-text-settings.yaml from path: %s', repo_settings_path) + with open(repo_settings_path, 'r', encoding='utf-8') as f: + settings: Dict[str, Any] = yaml.safe_load(f) use_gitignore = settings.get('gitignore-import-and-ignore', True) if 'ignore-content' in settings: - content_ignore_spec = pathspec.PathSpec.from_lines('gitwildmatch', settings['ignore-content']) + content_ignore_spec: Optional[PathSpec] = pathspec.PathSpec.from_lines( + 'gitwildmatch', settings['ignore-content'] + ) if 'ignore-tree-and-content' in settings: - tree_and_content_ignore_list.extend(settings['ignore-tree-and-content']) + tree_and_content_ignore_list.extend(settings.get('ignore-tree-and-content', [])) if cli_ignore_patterns: tree_and_content_ignore_list.extend(cli_ignore_patterns) @@ -95,15 +145,22 @@ def load_ignore_specs(path: str = '.', cli_ignore_patterns: Optional[List[str]] if use_gitignore: gitignore_path = os.path.join(path, '.gitignore') if os.path.exists(gitignore_path): - logging.debug(f'Loading .gitignore from path: {gitignore_path}') - with open(gitignore_path, 'r') as f: + logging.debug('Loading .gitignore from path: %s', gitignore_path) + with open(gitignore_path, 'r', encoding='utf-8') as f: gitignore_spec = pathspec.PathSpec.from_lines('gitwildmatch', f) - tree_and_content_ignore_spec = pathspec.PathSpec.from_lines('gitwildmatch', tree_and_content_ignore_list) + tree_and_content_ignore_spec = pathspec.PathSpec.from_lines( + 'gitwildmatch', tree_and_content_ignore_list + ) return gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec -def should_ignore_file(file_path: str, relative_path: str, gitignore_spec: Optional[PathSpec], - content_ignore_spec: Optional[PathSpec], tree_and_content_ignore_spec: Optional[PathSpec]) -> bool: +def should_ignore_file( + file_path: str, + relative_path: str, + gitignore_spec: Optional[PathSpec], + content_ignore_spec: Optional[PathSpec], + tree_and_content_ignore_spec: Optional[PathSpec] +) -> bool: """Check if a file should be ignored based on various ignore specifications. Args: @@ -126,44 +183,73 @@ def should_ignore_file(file_path: str, relative_path: str, gitignore_spec: Optio result = ( is_ignored_path(file_path) or - bool(gitignore_spec and gitignore_spec.match_file(relative_path)) or - bool(content_ignore_spec and content_ignore_spec.match_file(relative_path)) or - bool(tree_and_content_ignore_spec and tree_and_content_ignore_spec.match_file(relative_path)) or + bool( + gitignore_spec and + gitignore_spec.match_file(relative_path) + ) or + bool( + content_ignore_spec and + content_ignore_spec.match_file(relative_path) + ) or + bool( + tree_and_content_ignore_spec and + tree_and_content_ignore_spec.match_file(relative_path) + ) or os.path.basename(file_path).startswith('repo-to-text_') ) - logging.debug(f'Checking if file should be ignored:') - logging.debug(f' file_path: {file_path}') - logging.debug(f' relative_path: {relative_path}') - logging.debug(f' Result: {result}') + logging.debug('Checking if file should be ignored:') + logging.debug(' file_path: %s', file_path) + logging.debug(' relative_path: %s', relative_path) + logging.debug(' Result: %s', result) return result -def save_repo_to_text(path: str = '.', output_dir: Optional[str] = None, to_stdout: bool = False, cli_ignore_patterns: Optional[List[str]] = None) -> str: - """Save repository structure and contents to a text file. - - Args: - path: Repository path - output_dir: Directory to save output file - to_stdout: Whether to output to stdout instead of file - cli_ignore_patterns: List of patterns from command line - - Returns: - str: Path to the output file or the output text if to_stdout is True - """ - logging.debug(f'Starting to save repo structure to text for path: {path}') - gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs(path, cli_ignore_patterns) - tree_structure = get_tree_structure(path, gitignore_spec, tree_and_content_ignore_spec) - tree_structure = remove_empty_dirs(tree_structure, path) - logging.debug(f'Final tree structure to be written: {tree_structure}') - - timestamp = datetime.now(timezone.utc).strftime('%Y-%m-%d-%H-%M-%S-UTC') - output_file = f'repo-to-text_{timestamp}.txt' - - if output_dir: - if not os.path.exists(output_dir): - os.makedirs(output_dir) - output_file = os.path.join(output_dir, output_file) - +def save_repo_to_text( + path: str = '.', + output_dir: Optional[str] = None, + to_stdout: bool = False, + cli_ignore_patterns: Optional[List[str]] = None + ) -> str: + """Save repository structure and contents to a text file.""" + logging.debug('Starting to save repo structure to text for path: %s', path) + gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs( + path, cli_ignore_patterns + ) + tree_structure: str = get_tree_structure( + path, gitignore_spec, tree_and_content_ignore_spec + ) + logging.debug('Final tree structure to be written: %s', tree_structure) + + output_content = generate_output_content( + path, + tree_structure, + gitignore_spec, + content_ignore_spec, + tree_and_content_ignore_spec + ) + + if to_stdout: + print(output_content) + return output_content + + output_file = write_output_to_file(output_content, output_dir) + copy_to_clipboard(output_content) + + print( + "[SUCCESS] Repository structure and contents successfully saved to " + f"file: \"./{output_file}\"" + ) + + return output_file + +def generate_output_content( + path: str, + tree_structure: str, + gitignore_spec: Optional[PathSpec], + content_ignore_spec: Optional[PathSpec], + tree_and_content_ignore_spec: Optional[PathSpec] + ) -> str: + """Generate the output content for the repository.""" output_content: List[str] = [] project_name = os.path.basename(os.path.abspath(path)) output_content.append(f'Directory: {project_name}\n\n') @@ -172,7 +258,7 @@ def save_repo_to_text(path: str = '.', output_dir: Optional[str] = None, to_stdo if os.path.exists(os.path.join(path, '.gitignore')): output_content.append('├── .gitignore\n') - + output_content.append(tree_structure + '\n' + '```\n') logging.debug('Tree structure written to output content') @@ -180,47 +266,63 @@ def save_repo_to_text(path: str = '.', output_dir: Optional[str] = None, to_stdo for filename in files: file_path = os.path.join(root, filename) relative_path = os.path.relpath(file_path, path) - - if should_ignore_file(file_path, relative_path, gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec): + + if should_ignore_file( + file_path, + relative_path, + gitignore_spec, + content_ignore_spec, + tree_and_content_ignore_spec + ): continue relative_path = relative_path.replace('./', '', 1) - + output_content.append(f'\nContents of {relative_path}:\n') output_content.append('```\n') try: with open(file_path, 'r', encoding='utf-8') as f: output_content.append(f.read()) except UnicodeDecodeError: - logging.debug(f'Could not decode file contents: {file_path}') + logging.debug('Could not decode file contents: %s', file_path) output_content.append('[Could not decode file contents]\n') output_content.append('\n```\n') output_content.append('\n') logging.debug('Repository contents written to output content') - - output_text = ''.join(output_content) - - if to_stdout: - print(output_text) - return output_text - with open(output_file, 'w') as file: - file.write(output_text) - + return ''.join(output_content) + +def write_output_to_file(output_content: str, output_dir: Optional[str]) -> str: + """Write the output content to a file.""" + timestamp = datetime.now(timezone.utc).strftime('%Y-%m-%d-%H-%M-%S-UTC') + output_file = f'repo-to-text_{timestamp}.txt' + + if output_dir: + if not os.path.exists(output_dir): + os.makedirs(output_dir) + output_file = os.path.join(output_dir, output_file) + + with open(output_file, 'w', encoding='utf-8') as file: + file.write(output_content) + + return output_file + +def copy_to_clipboard(output_content: str) -> None: + """Copy the output content to the clipboard if possible.""" try: - import importlib.util - if importlib.util.find_spec("pyperclip"): - import pyperclip # type: ignore - pyperclip.copy(output_text) # type: ignore + import importlib.util # pylint: disable=import-outside-toplevel + spec: Optional[ModuleSpec] = importlib.util.find_spec("pyperclip") + if spec: + import pyperclip # pylint: disable=import-outside-toplevel # type: ignore + pyperclip.copy(output_content) # type: ignore logging.debug('Repository structure and contents copied to clipboard') else: print("Tip: Install 'pyperclip' package to enable automatic clipboard copying:") print(" pip install pyperclip") - except Exception as e: - logging.warning('Could not copy to clipboard. You might be running this script over SSH or without clipboard support.') - logging.debug(f'Clipboard copy error: {e}') - - print(f"[SUCCESS] Repository structure and contents successfully saved to file: \"./{output_file}\"") - - return output_file \ No newline at end of file + except ImportError as e: + logging.warning( + 'Could not copy to clipboard. You might be running this ' + 'script over SSH or without clipboard support.' + ) + logging.debug('Clipboard copy error: %s', e) diff --git a/repo_to_text/main.py b/repo_to_text/main.py index f911293..b25086e 100644 --- a/repo_to_text/main.py +++ b/repo_to_text/main.py @@ -1,3 +1,5 @@ +"""This is the main entry point for the repo_to_text package.""" + from repo_to_text.cli.cli import main if __name__ == '__main__': diff --git a/repo_to_text/utils/__init__.py b/repo_to_text/utils/__init__.py index 51c6c6e..3fd2aed 100644 --- a/repo_to_text/utils/__init__.py +++ b/repo_to_text/utils/__init__.py @@ -1,3 +1,5 @@ -from .utils import setup_logging, check_tree_command, is_ignored_path, remove_empty_dirs +"""This module contains utility functions for the repo_to_text package.""" -__all__ = ['setup_logging', 'check_tree_command', 'is_ignored_path', 'remove_empty_dirs'] \ No newline at end of file +from .utils import setup_logging, check_tree_command, is_ignored_path + +__all__ = ['setup_logging', 'check_tree_command', 'is_ignored_path'] diff --git a/repo_to_text/utils/utils.py b/repo_to_text/utils/utils.py index ea374a4..e18ceb6 100644 --- a/repo_to_text/utils/utils.py +++ b/repo_to_text/utils/utils.py @@ -1,7 +1,8 @@ -import os +"""This module contains utility functions for the repo_to_text package.""" + import shutil import logging -from typing import List, Set +from typing import List def setup_logging(debug: bool = False) -> None: """Set up logging configuration. @@ -19,7 +20,10 @@ def check_tree_command() -> bool: bool: True if tree command is available, False otherwise """ if shutil.which('tree') is None: - print("The 'tree' command is not found. Please install it using one of the following commands:") + print( + "The 'tree' command is not found. " + + "Please install it using one of the following commands:" + ) print("For Debian-based systems (e.g., Ubuntu): sudo apt-get install tree") print("For Red Hat-based systems (e.g., Fedora, CentOS): sudo yum install tree") return False @@ -40,43 +44,5 @@ def is_ignored_path(file_path: str) -> bool: is_ignored_file = any(file_path.startswith(prefix) for prefix in ignored_files_prefix) result = is_ignored_dir or is_ignored_file if result: - logging.debug(f'Path ignored: {file_path}') + logging.debug('Path ignored: %s', file_path) return result - -def remove_empty_dirs(tree_output: str, path: str = '.') -> str: - """Remove empty directories from tree output. - - Args: - tree_output: Output from tree command - path: Base path for the tree - - Returns: - str: Tree output with empty directories removed - """ - logging.debug('Removing empty directories from tree output') - lines = tree_output.splitlines() - non_empty_dirs: Set[str] = set() - filtered_lines: List[str] = [] - - for line in lines: - parts = line.strip().split() - if parts: - full_path = parts[-1] - if os.path.isdir(full_path) and not any(os.path.isfile(os.path.join(full_path, f)) for f in os.listdir(full_path)): - logging.debug(f'Directory is empty and will be removed: {full_path}') - continue - non_empty_dirs.add(os.path.dirname(full_path)) - filtered_lines.append(line) - - final_lines: List[str] = [] - for line in filtered_lines: - parts = line.strip().split() - if parts: - full_path = parts[-1] - if os.path.isdir(full_path) and full_path not in non_empty_dirs: - logging.debug(f'Directory is empty and will be removed: {full_path}') - continue - final_lines.append(line) - - logging.debug('Empty directory removal complete') - return '\n'.join(filtered_lines) \ No newline at end of file diff --git a/tests/test_cli.py b/tests/test_cli.py index 23747e5..7382ce7 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -1,15 +1,19 @@ +"""Test the CLI module.""" + import os -import pytest import tempfile import shutil from typing import Generator from unittest.mock import patch, MagicMock +import pytest from repo_to_text.cli.cli import ( create_default_settings_file, parse_args, main ) +# pylint: disable=redefined-outer-name + @pytest.fixture def temp_dir() -> Generator[str, None, None]: """Create a temporary directory for testing.""" @@ -48,11 +52,11 @@ def test_create_default_settings_file(temp_dir: str) -> None: """Test creation of default settings file.""" os.chdir(temp_dir) create_default_settings_file() - + settings_file = '.repo-to-text-settings.yaml' assert os.path.exists(settings_file) - - with open(settings_file, 'r') as f: + + with open(settings_file, 'r', encoding='utf-8') as f: content = f.read() assert 'gitignore-import-and-ignore: True' in content assert 'ignore-tree-and-content:' in content @@ -63,7 +67,7 @@ def test_create_default_settings_file_already_exists(temp_dir: str) -> None: os.chdir(temp_dir) # Create the file first create_default_settings_file() - + # Try to create it again with pytest.raises(FileExistsError) as exc_info: create_default_settings_file() @@ -94,7 +98,10 @@ def test_main_create_settings(mock_create_settings: MagicMock) -> None: @patch('repo_to_text.cli.cli.setup_logging') @patch('repo_to_text.cli.cli.create_default_settings_file') -def test_main_with_debug_logging(mock_create_settings: MagicMock, mock_setup_logging: MagicMock) -> None: +def test_main_with_debug_logging( + mock_create_settings: MagicMock, + mock_setup_logging: MagicMock +) -> None: """Test main function with debug logging enabled.""" with patch('sys.argv', ['repo-to-text', '--debug', '--create-settings']): with pytest.raises(SystemExit) as exc_info: @@ -104,4 +111,4 @@ def test_main_with_debug_logging(mock_create_settings: MagicMock, mock_setup_log mock_create_settings.assert_called_once() if __name__ == "__main__": - pytest.main([__file__]) \ No newline at end of file + pytest.main([__file__]) diff --git a/tests/test_core.py b/tests/test_core.py index aa05e3d..1882388 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -1,17 +1,21 @@ +"""Test the core module.""" + import os import tempfile import shutil -import pytest from typing import Generator +import pytest + from repo_to_text.core.core import ( get_tree_structure, load_ignore_specs, should_ignore_file, is_ignored_path, - remove_empty_dirs, save_repo_to_text ) +# pylint: disable=redefined-outer-name + @pytest.fixture def temp_dir() -> Generator[str, None, None]: """Create a temporary directory for testing.""" @@ -20,12 +24,13 @@ def temp_dir() -> Generator[str, None, None]: shutil.rmtree(temp_path) @pytest.fixture -def sample_repo(temp_dir: str) -> str: +def sample_repo(tmp_path: str) -> str: """Create a sample repository structure for testing.""" + tmp_path_str = str(tmp_path) # Create directories - os.makedirs(os.path.join(temp_dir, "src")) - os.makedirs(os.path.join(temp_dir, "tests")) - + os.makedirs(os.path.join(tmp_path_str, "src")) + os.makedirs(os.path.join(tmp_path_str, "tests")) + # Create sample files files = { "README.md": "# Test Project", @@ -45,14 +50,14 @@ def sample_repo(temp_dir: str) -> str: - "README.md" """ } - + for file_path, content in files.items(): - full_path = os.path.join(temp_dir, file_path) + full_path = os.path.join(tmp_path_str, file_path) os.makedirs(os.path.dirname(full_path), exist_ok=True) - with open(full_path, "w") as f: + with open(full_path, "w", encoding='utf-8') as f: f.write(content) - - return temp_dir + + return tmp_path_str def test_is_ignored_path() -> None: """Test the is_ignored_path function.""" @@ -63,27 +68,31 @@ def test_is_ignored_path() -> None: def test_load_ignore_specs(sample_repo: str) -> None: """Test loading ignore specifications from files.""" - gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs(sample_repo) - + gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs( + sample_repo + ) + assert gitignore_spec is not None assert content_ignore_spec is not None assert tree_and_content_ignore_spec is not None - + # Test gitignore patterns assert gitignore_spec.match_file("test.pyc") is True assert gitignore_spec.match_file("__pycache__/cache.py") is True assert gitignore_spec.match_file(".git/config") is True - + # Test content ignore patterns assert content_ignore_spec.match_file("README.md") is True - + # Test tree and content ignore patterns assert tree_and_content_ignore_spec.match_file(".git/config") is True def test_should_ignore_file(sample_repo: str) -> None: """Test file ignoring logic.""" - gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs(sample_repo) - + gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs( + sample_repo + ) + # Test various file paths assert should_ignore_file( ".git/config", @@ -92,7 +101,7 @@ def test_should_ignore_file(sample_repo: str) -> None: content_ignore_spec, tree_and_content_ignore_spec ) is True - + assert should_ignore_file( "src/main.py", "src/main.py", @@ -105,7 +114,7 @@ def test_get_tree_structure(sample_repo: str) -> None: """Test tree structure generation.""" gitignore_spec, _, tree_and_content_ignore_spec = load_ignore_specs(sample_repo) tree_output = get_tree_structure(sample_repo, gitignore_spec, tree_and_content_ignore_spec) - + # Basic structure checks assert "src" in tree_output assert "tests" in tree_output @@ -113,74 +122,42 @@ def test_get_tree_structure(sample_repo: str) -> None: assert "test_main.py" in tree_output assert ".git" not in tree_output -def test_remove_empty_dirs(temp_dir: str) -> None: - """Test removal of empty directories from tree output.""" - # Create test directory structure - os.makedirs(os.path.join(temp_dir, "src")) - os.makedirs(os.path.join(temp_dir, "empty_dir")) - os.makedirs(os.path.join(temp_dir, "tests")) - - # Create some files - with open(os.path.join(temp_dir, "src/main.py"), "w") as f: - f.write("print('test')") - with open(os.path.join(temp_dir, "tests/test_main.py"), "w") as f: - f.write("def test(): pass") - - # Create a mock tree output that matches the actual tree command format - tree_output = ( - f"{temp_dir}\n" - f"├── {os.path.join(temp_dir, 'src')}\n" - f"│ └── {os.path.join(temp_dir, 'src/main.py')}\n" - f"├── {os.path.join(temp_dir, 'empty_dir')}\n" - f"└── {os.path.join(temp_dir, 'tests')}\n" - f" └── {os.path.join(temp_dir, 'tests/test_main.py')}\n" - ) - - filtered_output = remove_empty_dirs(tree_output, temp_dir) - - # Check that empty_dir is removed but other directories remain - assert "empty_dir" not in filtered_output - assert os.path.join(temp_dir, "src") in filtered_output - assert os.path.join(temp_dir, "tests") in filtered_output - assert os.path.join(temp_dir, "src/main.py") in filtered_output - assert os.path.join(temp_dir, "tests/test_main.py") in filtered_output - def test_save_repo_to_text(sample_repo: str) -> None: """Test the main save_repo_to_text function.""" # Create output directory output_dir = os.path.join(sample_repo, "output") os.makedirs(output_dir, exist_ok=True) - + # Create .git directory to ensure it's properly ignored os.makedirs(os.path.join(sample_repo, ".git")) - with open(os.path.join(sample_repo, ".git/config"), "w") as f: + with open(os.path.join(sample_repo, ".git/config"), "w", encoding='utf-8') as f: f.write("[core]\n\trepositoryformatversion = 0\n") - + # Test file output output_file = save_repo_to_text(sample_repo, output_dir=output_dir) assert os.path.exists(output_file) assert os.path.dirname(output_file) == output_dir - + # Check file contents - with open(output_file, 'r') as f: + with open(output_file, 'r', encoding='utf-8') as f: content = f.read() - + # Basic content checks assert "Directory Structure:" in content - + # Check for expected files assert "src/main.py" in content assert "tests/test_main.py" in content - + # Check for file contents assert "print('Hello World')" in content assert "def test_sample(): pass" in content - + # Ensure ignored patterns are not in output assert ".git/config" not in content # Check specific file assert "repo-to-text_" not in content assert ".repo-to-text-settings.yaml" not in content - + # Check that .gitignore content is not included assert "*.pyc" not in content assert "__pycache__" not in content @@ -197,14 +174,16 @@ def test_load_ignore_specs_with_cli_patterns(sample_repo: str) -> None: """Test loading ignore specs with CLI patterns.""" cli_patterns = ["*.log", "temp/"] _, _, tree_and_content_ignore_spec = load_ignore_specs(sample_repo, cli_patterns) - + assert tree_and_content_ignore_spec.match_file("test.log") is True assert tree_and_content_ignore_spec.match_file("temp/file.txt") is True assert tree_and_content_ignore_spec.match_file("normal.txt") is False def test_load_ignore_specs_without_gitignore(temp_dir: str) -> None: """Test loading ignore specs when .gitignore is missing.""" - gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs(temp_dir) + gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs( + temp_dir + ) assert gitignore_spec is None assert content_ignore_spec is None assert tree_and_content_ignore_spec is not None @@ -214,17 +193,19 @@ def test_get_tree_structure_with_special_chars(temp_dir: str) -> None: # Create files with special characters special_dir = os.path.join(temp_dir, "special chars") os.makedirs(special_dir) - with open(os.path.join(special_dir, "file with spaces.txt"), "w") as f: + with open(os.path.join(special_dir, "file with spaces.txt"), "w", encoding='utf-8') as f: f.write("test") - + tree_output = get_tree_structure(temp_dir) assert "special chars" in tree_output assert "file with spaces.txt" in tree_output def test_should_ignore_file_edge_cases(sample_repo: str) -> None: """Test edge cases for should_ignore_file function.""" - gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs(sample_repo) - + gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs( + sample_repo + ) + # Test with dot-prefixed paths assert should_ignore_file( "./src/main.py", @@ -233,7 +214,7 @@ def test_should_ignore_file_edge_cases(sample_repo: str) -> None: content_ignore_spec, tree_and_content_ignore_spec ) is False - + # Test with absolute paths abs_path = os.path.join(sample_repo, "src/main.py") rel_path = "src/main.py" @@ -252,9 +233,9 @@ def test_save_repo_to_text_with_binary_files(temp_dir: str) -> None: binary_content = b'\x00\x01\x02\x03' with open(binary_path, "wb") as f: f.write(binary_content) - + output = save_repo_to_text(temp_dir, to_stdout=True) - + # Check that the binary file is listed in the structure assert "binary.bin" in output # Check that the file content section exists with raw binary content @@ -264,13 +245,13 @@ def test_save_repo_to_text_with_binary_files(temp_dir: str) -> None: def test_save_repo_to_text_custom_output_dir(temp_dir: str) -> None: """Test save_repo_to_text with custom output directory.""" # Create a simple file structure - with open(os.path.join(temp_dir, "test.txt"), "w") as f: + with open(os.path.join(temp_dir, "test.txt"), "w", encoding='utf-8') as f: f.write("test content") - + # Create custom output directory output_dir = os.path.join(temp_dir, "custom_output") output_file = save_repo_to_text(temp_dir, output_dir=output_dir) - + assert os.path.exists(output_file) assert os.path.dirname(output_file) == output_dir assert output_file.startswith(output_dir) @@ -281,5 +262,44 @@ def test_get_tree_structure_empty_directory(temp_dir: str) -> None: # Should only contain the directory itself assert tree_output.strip() == "" or tree_output.strip() == temp_dir +def test_empty_dirs_filtering(tmp_path: str) -> None: + """Test filtering of empty directories in tree structure generation.""" + # Create test directory structure with normalized paths + base_path = os.path.normpath(tmp_path) + src_path = os.path.join(base_path, "src") + empty_dir_path = os.path.join(base_path, "empty_dir") + tests_path = os.path.join(base_path, "tests") + + os.makedirs(src_path) + os.makedirs(empty_dir_path) + os.makedirs(tests_path) + + # Create some files + with open(os.path.join(src_path, "main.py"), "w", encoding='utf-8') as f: + f.write("print('test')") + with open(os.path.join(tests_path, "test_main.py"), "w", encoding='utf-8') as f: + f.write("def test(): pass") + + # Get tree structure directly using the function + tree_output = get_tree_structure(base_path) + + # Print debug information + print("\nTree output:") + print(tree_output) + + # Basic structure checks for directories with files + assert "src" in tree_output + assert "tests" in tree_output + assert "main.py" in tree_output + assert "test_main.py" in tree_output + + # Check that empty directory is not included by checking each line + for line in tree_output.splitlines(): + # Skip the root directory line + if base_path in line: + continue + # Check that no line contains 'empty_dir' + assert "empty_dir" not in line, f"Found empty_dir in line: {line}" + if __name__ == "__main__": pytest.main([__file__]) diff --git a/tests/test_utils.py b/tests/test_utils.py index c6a5ff8..43a772d 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,6 +1,10 @@ +"""Test the utils module.""" + import logging -import pytest from typing import Generator +import io +import pytest + from repo_to_text.utils.utils import setup_logging @pytest.fixture(autouse=True) @@ -20,7 +24,7 @@ def test_setup_logging_debug() -> None: root_logger = logging.getLogger() root_logger.handlers.clear() # Clear existing handlers root_logger.setLevel(logging.WARNING) # Reset to default - + setup_logging(debug=True) assert len(root_logger.handlers) > 0 assert root_logger.level == logging.DEBUG @@ -30,7 +34,7 @@ def test_setup_logging_info() -> None: root_logger = logging.getLogger() root_logger.handlers.clear() # Clear existing handlers root_logger.setLevel(logging.WARNING) # Reset to default - + setup_logging(debug=False) assert len(root_logger.handlers) > 0 assert root_logger.level == logging.INFO @@ -40,14 +44,14 @@ def test_setup_logging_formatter() -> None: setup_logging(debug=True) logger = logging.getLogger() handlers = logger.handlers - + # Check if there's at least one handler assert len(handlers) > 0 - + # Check formatter formatter = handlers[0].formatter assert formatter is not None - + # Test format string test_record = logging.LogRecord( name='test', @@ -66,26 +70,27 @@ def test_setup_logging_multiple_calls() -> None: """Test that multiple calls to setup_logging don't create duplicate handlers.""" root_logger = logging.getLogger() root_logger.handlers.clear() - + setup_logging(debug=True) initial_handler_count = len(root_logger.handlers) - + # Call setup_logging again setup_logging(debug=True) - assert len(root_logger.handlers) == initial_handler_count, "Should not create duplicate handlers" + assert len(root_logger.handlers) == \ + initial_handler_count, "Should not create duplicate handlers" def test_setup_logging_level_change() -> None: """Test changing log levels between setup_logging calls.""" root_logger = logging.getLogger() root_logger.handlers.clear() - + # Start with debug setup_logging(debug=True) assert root_logger.level == logging.DEBUG - + # Clear handlers before next setup root_logger.handlers.clear() - + # Switch to info setup_logging(debug=False) assert root_logger.level == logging.INFO @@ -94,24 +99,25 @@ def test_setup_logging_message_format() -> None: """Test the actual format of logged messages.""" setup_logging(debug=True) logger = logging.getLogger() - + # Create a temporary handler to capture output - import io log_capture = io.StringIO() handler = logging.StreamHandler(log_capture) # Use formatter that includes pathname - handler.setFormatter(logging.Formatter('%(levelname)s %(name)s:%(pathname)s:%(lineno)d %(message)s')) + handler.setFormatter( + logging.Formatter('%(levelname)s %(name)s:%(pathname)s:%(lineno)d %(message)s') + ) logger.addHandler(handler) - + # Ensure debug level is set logger.setLevel(logging.DEBUG) handler.setLevel(logging.DEBUG) - + # Log a test message test_message = "Test log message" logger.debug(test_message) log_output = log_capture.getvalue() - + # Verify format components assert test_message in log_output assert "DEBUG" in log_output @@ -121,22 +127,21 @@ def test_setup_logging_error_messages() -> None: """Test logging of error messages.""" setup_logging(debug=False) logger = logging.getLogger() - + # Create a temporary handler to capture output - import io log_capture = io.StringIO() handler = logging.StreamHandler(log_capture) handler.setFormatter(logger.handlers[0].formatter) logger.addHandler(handler) - + # Log an error message error_message = "Test error message" logger.error(error_message) log_output = log_capture.getvalue() - + # Error messages should always be logged regardless of debug setting assert error_message in log_output assert "ERROR" in log_output if __name__ == "__main__": - pytest.main([__file__]) \ No newline at end of file + pytest.main([__file__])