Skip to content

Commit

Permalink
Fix issue #1802: Handle UnicodeDecodeError in repo file paths
Browse files Browse the repository at this point in the history
Added error handling for UnicodeDecodeError when decoding paths in the Git repository.
This issue arises when paths are encoded in formats other than the expected system encoding.

The error handler now provides a detailed error message, helping users identify potential encoding mismatches.

Closes #1802
  • Loading branch information
mdklab committed Dec 23, 2024
1 parent 083605e commit 5c92491
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 0 deletions.
6 changes: 6 additions & 0 deletions aider/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -393,6 +393,12 @@ def sanity_check_repo(repo, io):
if not repo.git_repo_error:
return True
error_msg = str(repo.git_repo_error)
except UnicodeDecodeError as exc:
error_msg = (
f"Failed to read the Git repository. This issue is likely caused by a path encoded "
f"in a format different from the expected encoding \"{sys.getfilesystemencoding()}\".\n"
f"Internal error: {str(exc)}"
)
except ANY_GIT_ERROR as exc:
error_msg = str(exc)
bad_ver = "version in (1, 2)" in error_msg
Expand Down
40 changes: 40 additions & 0 deletions tests/basic/test_sanity_check_repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@

from aider import urls
from aider.main import sanity_check_repo
from aider.repo import GitRepo
from aider.io import InputOutput


@pytest.fixture
Expand Down Expand Up @@ -182,3 +184,41 @@ def test_sanity_check_repo_with_no_repo(mock_io):
# Assert that no errors or outputs were logged
mock_io.tool_error.assert_not_called()
mock_io.tool_output.assert_not_called()


def corrupt_git_index(repo_path):
index_path = os.path.join(repo_path, ".git", "index")
with open(index_path, "r+b") as f:
# Verify the file has the correct signature
signature = f.read(4)
if signature != b"DIRC":
raise ValueError("Invalid git index file signature.")

# Seek to the data section and inject invalid bytes to simulate encoding error
f.seek(77)
f.write(b"\xF5" * 5)


def test_sanity_check_repo_with_corrupt_index(create_repo, mock_io):
repo_path, repo = create_repo
# Corrupt the Git index file
corrupt_git_index(repo_path)

# Create GitRepo instance
git_repo = GitRepo(InputOutput(), None, repo_path)

# Call the function
result = sanity_check_repo(git_repo, mock_io)

# Assert that the function returns False
assert result is False

# Assert that the appropriate error messages were logged
mock_io.tool_error.assert_called_with("Unable to read git repository, it may be corrupt?")
mock_io.tool_output.assert_called_with(
(
"Failed to read the Git repository. This issue is likely caused by a path encoded "
"in a format different from the expected encoding \"utf-8\".\n"
"Internal error: 'utf-8' codec can't decode byte 0xf5 in position 3: invalid start byte"
)
)

0 comments on commit 5c92491

Please sign in to comment.