Aider-AI · mbailey · Dec 15, 2024 · Dec 17, 2024 · Dec 17, 2024 · Dec 17, 2024
diff --git a/aider/commands.py b/aider/commands.py
@@ -1112,12 +1112,21 @@ def cmd_voice(self, args):
         "Record and transcribe voice input"
 
         if not self.voice:
-            if "OPENAI_API_KEY" not in os.environ:
-                self.io.tool_error("To use /voice you must provide an OpenAI API key.")
+            if "WHISPER_API_BASE" in os.environ and "WHISPER_API_KEY" not in os.environ:
+                self.io.tool_error(
+                    "To use /voice with a custom Whisper API you must provide a custom Whisper API key"
+                )
+                return
+            elif "OPENAI_API_KEY" not in os.environ:
+                self.io.tool_error(
+                    "To use /voice you must provide an OpenAI API key (or custom Whisper API and key)."
+                )
                 return
+
             try:
                 self.voice = voice.Voice(
-                    audio_format=self.args.voice_format, device_name=self.args.voice_input_device
+                    audio_format=self.args.voice_format,
+                    device_name=self.args.voice_input_device,
                 )
             except voice.SoundDeviceError:
                 self.io.tool_error(

diff --git a/aider/voice.py b/aider/voice.py
@@ -150,8 +150,34 @@ def raw_record_and_transcribe(self, history, language):
 
         with open(filename, "rb") as fh:
             try:
+                # Get API configuration from environment
+                api_base = os.getenv("WHISPER_API_BASE", None ) # None is the default OpenAI endpoint
+                api_key = os.getenv("WHISPER_API_KEY", None )   # None causes OPENAI_API_KEY to be used
+
+                # If a custom base is specified, require a specific whisper key
+                if api_base and not api_key:
+                    raise Exception(
+                        "When using a custom WHISPER_API_BASE, you must provide a WHISPER_API_KEY"
+                        " via --api whisper=<key>"
+                    )
+
+                # Only use OpenAI key as fallback if using default OpenAI endpoint
+                if not api_key:
+                    if not api_base or api_base == "https://api.openai.com/v1":
+                        api_key = os.getenv("OPENAI_API_KEY")
+                        if not api_key:
+                            raise Exception(
+                                "No API key found. Please set either WHISPER_API_KEY or OPENAI_API_KEY"
+                                " environment variables, or use --api whisper=<key>"
+                            )
+
                 transcript = litellm.transcription(
-                    model="whisper-1", file=fh, prompt=history, language=language
+                    model="whisper-1",
+                    file=fh,
+                    prompt=history,
+                    language=language,
+                    api_base=api_base,
+                    api_key=api_key,
                 )
             except Exception as err:
                 print(f"Unable to transcribe {filename}: {err}")
@@ -165,7 +191,4 @@ def raw_record_and_transcribe(self, history, language):
 
 
 if __name__ == "__main__":
-    api_key = os.getenv("OPENAI_API_KEY")
-    if not api_key:
-        raise ValueError("Please set the OPENAI_API_KEY environment variable.")
     print(Voice().record_and_transcribe())
diff --git a/aider/website/assets/sample.env b/aider/website/assets/sample.env
@@ -69,6 +69,12 @@
 ## Specify the api base url
 #AIDER_OPENAI_API_BASE=
 
+## Specify an alternate api base url for Whisper transcriptions (optional)
+#AIDER_WHISPER_API_BASE=
+
+## Specify an alternate api key for Whisper transcriptions (optional)
+#AIDER_WHISPER_API_KEY=
+
 ## (deprecated, use --set-env OPENAI_API_TYPE=<value>)
 #AIDER_OPENAI_API_TYPE=
 

diff --git a/aider/website/docs/install/optional.md b/aider/website/docs/install/optional.md
@@ -65,6 +65,9 @@ for additional information.
 Aider supports 
 [coding with your voice](https://aider.chat/docs/usage/voice.html)
 using the in-chat `/voice` command.
+
+### Audio capture setup
+
 Aider uses the [PortAudio](http://www.portaudio.com) library to
 capture audio.
 Installing PortAudio is completely optional, but can usually be accomplished like this:
@@ -74,6 +77,42 @@ Installing PortAudio is completely optional, but can usually be accomplished lik
 - For Linux, do `sudo apt-get install libportaudio2`
   - Some linux environments may also need `sudo apt install libasound2-plugins`
 
+### Whisper API configuration
+
+By default, aider uses OpenAI's Whisper API for voice transcription. You can configure an alternate Whisper API provider:
+
+```bash
+# Via command line arguments
+aider --set-env WHISPER_API_BASE=https://api.example.com --api-key whisper=your-api-key
+
+# Via environment variables or .env file
+WHISPER_API_BASE=https://api.example.com
+WHISPER_API_KEY=your-api-key
+
+# Via config file (.aider.conf.yml)
+api-base:
+  - whisper=https://api.example.com
+api-key:
+  - whisper=your-api-key
+```
+
+When using an alternate Whisper API endpoint:
+
+- You must provide both the API base URL and API key
+- Your OpenAI API key will not be sent to the alternate endpoint
+- This is useful for:
+    - Using a different Whisper API provider
+    - Running Whisper locally or on your own infrastructure
+    - Controlling costs or data privacy
+
+API key behavior:
+
+- If using a custom WHISPER_API_BASE, you must provide a WHISPER_API_KEY
+- If using the default OpenAI endpoint (or no endpoint specified):
+    - Will use WHISPER_API_KEY if provided
+    - Will fall back to OPENAI_API_KEY if no WHISPER_API_KEY is set
+- If neither key is available, an error will be raised
+
 ## Add aider to your IDE/editor
 
 You can use 

diff --git a/aider/website/docs/usage/voice.md b/aider/website/docs/usage/voice.md
@@ -17,11 +17,11 @@ when you ask aider to edit your code.
 Use the in-chat `/voice` command to start recording,
 and press `ENTER` when you're done speaking.
 Your voice coding instructions will be transcribed, 
-as if you had  typed them into
+as if you had typed them into
 the aider chat session.
 
 See the [installation instructions](https://aider.chat/docs/install/optional.html#enable-voice-coding) for
-information on how to enable the `/voice` command.
+information on how to enable the `/voice` command and configure alternate Whisper API endpoints.
 
 <br/>
 <div class="chat-transcript" markdown="1">

diff --git a/tests/basic/test_voice.py b/tests/basic/test_voice.py
@@ -101,3 +101,23 @@ def test_record_and_transcribe_device_error():
         ):
             result = voice.record_and_transcribe()
             assert result is None
+
+def test_record_and_transcribe_no_api_key():
+    with patch("aider.voice.sf", MagicMock()):
+        voice = Voice()
+        with patch.dict(os.environ, {}, clear=True):  # Clear environment variables
+            result = voice.record_and_transcribe()
+            assert result is None
+
+def test_record_and_transcribe_custom_base_no_key():
+    with patch("aider.voice.sf", MagicMock()):
+        with patch("aider.voice.Voice.raw_record_and_transcribe", side_effect=Exception(
+            "When using a custom WHISPER_API_BASE, you must provide a WHISPER_API_KEY via --api whisper=<key>"
+        )) as mock_record:
+            voice = Voice()
+            with patch.dict(os.environ, {"WHISPER_API_BASE": "http://custom.api"}, clear=True):
+                with pytest.raises(Exception) as exc:
+                    voice.record_and_transcribe()
+                assert "When using a custom WHISPER_API_BASE" in str(exc.value)
+                assert "via --api whisper=<key>" in str(exc.value)
+