Speech To Text

The DevExtreme SpeechToText component allows you to integrate voice input into your DevExtreme-powered app. The component implements the Web Speech API SpeechRecognition interface and supports custom speech recognizers.

Backend API

<div class="dx-viewport"> <div class="speech-to-text-demo"> <div class="speech-to-text-container"> <span>Use voice recognition (speech to text)</span> @(Html.DevExtreme().SpeechToText() .ID("speech-to-text") .Type(ButtonType.Default) .Hint("Start voice recognition") .StartIcon("micoutline") .SpeechRecognitionConfig(new JS("{ interimResults: true, continuous: false }")) .OnStartClick("onStartClick") .OnResult("onResult") .OnEnd("onEnd") ) @(Html.DevExtreme().TextArea() .ID("text-area") .Width(360) .Height(120) .Placeholder("Recognized text will appear here...") .InputAttr("aria-label", "Recognized Text") .OnValueChanged("onTextAreaValueChanged") ) @(Html.DevExtreme().Button() .ID("clear-button") .Text("Clear") .Disabled(true) .OnClick("onClearButtonClick") ) </div> <div class="options"> <div class="caption">Options</div> <div class="option"> <div>Display Mode</div> @(Html.DevExtreme().SelectBox() .ID("display-mode") .Items(new[] { "Icon Only", "Text and Icon", "Custom" }) .Value("Icon Only") .InputAttr("aria-label", "Display Mode") .OnValueChanged("onDisplayModeValueChanged") ) </div> <div class="option"> <div>Styling Mode</div> @(Html.DevExtreme().SelectBox() .ID("styling-mode") .Items(new[] { "Contained", "Text", "Outlined" }) .Value("Contained") .InputAttr("aria-label", "Styling Mode") .OnValueChanged("onStylingModeChanged") ) </div> <div class="option"> <div>Type</div> @(Html.DevExtreme().SelectBox() .ID("type") .Items(new[] { "Normal", "Success", "Default", "Danger" }) .Value("Default") .InputAttr("aria-label", "Type") .OnValueChanged("onTypeChanged") ) </div> <div class="switch"> @(Html.DevExtreme().Switch() .ID("disabled") .OnValueChanged("onDisabledChanged") ) <span>Disabled</span> </div> <div class="option-separator"></div> <div class="option"> <div>Language</div> @(Html.DevExtreme().SelectBox() .ID("language") .Items(new[] { "Auto-detect", "English", "Spanish", "French", "German" }) .Value("Auto-detect") .InputAttr("aria-label", "Language") .OnValueChanged("onLanguageChanged") ) </div> <div class="switch"> @(Html.DevExtreme().Switch() .ID("interim-results") .Value(true) .OnValueChanged("onInterimResultsChanged") ) <span>Interim Results</span> </div> <div class="switch"> @(Html.DevExtreme().Switch() .ID("continuous-recognition") .OnValueChanged("onContinuousChanged") ) <span>Continuous Recognition</span> </div> <div class="option-separator"></div> <div class="switch"> @(Html.DevExtreme().Switch() .ID("animation") .Value(true) .OnValueChanged("onAnimationChanged") ) <span>Animation</span> </div> </div> </div> </div> <script> let state = "initial"; const shouldUpdateType = () => $("#display-mode").dxSelectBox("instance").option("value") === "Custom"; const langMap = { "Auto-detect": "", "English": "en-US", "Spanish": "es-ES", "French": "fr-FR", "German": "de-DE", }; function onStartClick({ component }) { if (!window.SpeechRecognition && !window.webkitSpeechRecognition) { DevExpress.ui.notify({ message: 'The browser does not support Web Speech API (SpeechRecognition).', type: 'error', displayTime: 7000, position: 'bottom center', width: 'auto', }); return; } state = "listening"; component.option("hint", "Stop voice recognition"); if (!shouldUpdateType()) { return; } $("#type").dxSelectBox("instance").option("value", "Danger"); } function onEnd({ component }) { state = "initial"; component.option("hint", "Start voice recognition"); if (!shouldUpdateType()) { return; } $("#type").dxSelectBox("instance").option("value", "Default"); }; function onResult({ component, event }) { const { results } = event; const resultText = Object.values(results) .map((resultItem) => resultItem[0].transcript.trim()) .join(" "); $("#text-area").dxTextArea("instance").option("value", resultText); }; function onTextAreaValueChanged({ value }) { $('#clear-button').dxButton('instance').option('disabled', !value); }; function onClearButtonClick() { $('#text-area').dxTextArea('instance').option('value', ''); } function onDisplayModeValueChanged({ value }) { const $speechToText = $("#speech-to-text"); const speechToText = $speechToText.dxSpeechToText("instance"); const isCustomMode = value === "Custom"; const stylingMode = $("#styling-mode").dxSelectBox("instance"); const type = $("#type").dxSelectBox("instance"); stylingMode.option("disabled", isCustomMode); type.option("disabled", isCustomMode); $speechToText.removeClass("custom-button"); if (value === "Text and Icon") { speechToText.option({ startText: "Dictate", stopText: "Stop", }); return; } speechToText.option({ startText: "", stopText: "" }); if (isCustomMode) { stylingMode.option("value", "Contained"); type.option("value", state === "initial" ? "Default" : "Danger"); $speechToText.addClass("custom-button"); } } function onStylingModeChanged({ value }) { $('#speech-to-text').dxSpeechToText('instance').option('stylingMode', value.toLowerCase()); }; function onTypeChanged({ value }) { $('#speech-to-text').dxSpeechToText('instance').option('type', value.toLowerCase()); }; function onDisabledChanged({ value }) { $('#speech-to-text').dxSpeechToText('instance').option('disabled', value); }; function onLanguageChanged({ value }) { $('#speech-to-text').dxSpeechToText('instance').option('speechRecognitionConfig.lang', langMap[value]); }; function onInterimResultsChanged({ value }) { $('#speech-to-text').dxSpeechToText('instance').option('speechRecognitionConfig.interimResults', value); }; function onContinuousChanged({ value }) { $('#speech-to-text').dxSpeechToText('instance').option('speechRecognitionConfig.continuous', value); }; function onAnimationChanged({ value }) { $('#speech-to-text').toggleClass('animation-disabled', !value); }; </script>

using Microsoft.AspNetCore.Mvc; namespace DevExtreme.NETCore.Demos.Controllers { public class SpeechToTextController : Controller { public ActionResult Overview() { return View(); } } }

.speech-to-text-demo { display: flex; gap: 20px; height: 640px; } .speech-to-text-container { display: flex; flex-direction: column; row-gap: 16px; flex-grow: 1; align-items: center; justify-content: center; } #text-area { margin-top: 16px; } .options { display: flex; flex-direction: column; flex-shrink: 0; width: 300px; box-sizing: border-box; padding: 20px; background-color: rgba(191, 191, 191, 0.15); gap: 16px; } .caption { font-weight: 500; font-size: 18px; } .option { display: flex; flex-direction: column; row-gap: 4px; } .switch { display: flex; align-items: center; column-gap: 8px; } .option-separator { border-bottom: 1px solid var(--dx-color-border); } #speech-to-text.animation-disabled { animation: none; } #speech-to-text.custom-button { border-radius: 2rem; }

You can integrate SpeechToText with any text input, including other DevExtreme components. To introduce this capability, set a component's value property to transcribed text. SpeechToText returns transcribed text in the onResult handler as users speak. When speech stops, the component calls the onEnd handler and switches from a "listening" state to the initial state. SpeechToText implements different icon (startIcon/stopIcon), text (startText/stopText), and click handler (onStartClick/onStopClick) properties in each component state.

For a complete overview of SpeechToText options (including Web Speech API options), refer to the following topic: SpeechToText API Reference.