Documentation: [UnityWebGLSpeechDetection] [private]
Documentation: [UnityWebGLSpeechSynthesis] [private]
The Development Blog of Tim Graupmann
Documentation: [UnityWebGLSpeechDetection] [private]
Documentation: [UnityWebGLSpeechSynthesis] [private]
To be able to call out “fire” and “stop”, I made some edits to the `F3DPlayerTurretController.cs` script.
using UnityEngine; using System.Collections; using UnityWebGLSpeechDetection; namespace Forge3D { public class F3DPlayerTurretController : MonoBehaviour { RaycastHit hitInfo; // Raycast structure public F3DTurret turret; bool isFiring; // Is turret currently in firing state public F3DFXController fxController; // reference to the proxy private ISpeechDetectionPlugin _mSpeechDetectionPlugin = null; enum FireState { IDLE, DETECTED_FIRE, FIRE_ONCE, FIRE_IDLE, DETECTED_STOP, STOP_ONCE } // detect the word once in all updates private static FireState _sFireState = FireState.IDLE; // make sure all turrets detect the async word in their update event private static bool _sReadyForLateUpdate = false; // init the speech proxy private IEnumerator Start() { // get the singleton instance _mSpeechDetectionPlugin = ProxySpeechDetectionPlugin.GetInstance(); // check the reference to the plugin if (null == _mSpeechDetectionPlugin) { Debug.LogError("Proxy Speech Detection Plugin is not set!"); yield break; } // wait for plugin to become available while (!_mSpeechDetectionPlugin.IsAvailable()) { yield return null; } // subscribe to events _mSpeechDetectionPlugin.AddListenerOnDetectionResult(HandleDetectionResult); // abort and clear existing words _mSpeechDetectionPlugin.Abort(); } // Handler for speech detection events void HandleDetectionResult(object sender, SpeechDetectionEventArgs args) { if (null == args.detectionResult) { return; } SpeechRecognitionResult[] results = args.detectionResult.results; if (null == results) { return; } bool doAbort = false; foreach (SpeechRecognitionResult result in results) { SpeechRecognitionAlternative[] alternatives = result.alternatives; if (null == alternatives) { continue; } foreach (SpeechRecognitionAlternative alternative in alternatives) { if (string.IsNullOrEmpty(alternative.transcript)) { continue; } string lower = alternative.transcript.ToLower(); Debug.LogFormat("Detected: {0}", lower); if (lower.Contains("fire")) { if (_sFireState == FireState.IDLE) { _sFireState = FireState.DETECTED_FIRE; } doAbort = true; } if (lower.Contains("stop")) { if (_sFireState == FireState.FIRE_IDLE) { _sFireState = FireState.DETECTED_STOP; } doAbort = true; } } } // abort detection on match for faster matching on words instead of complete sentences if (doAbort) { _mSpeechDetectionPlugin.Abort(); } } // make the async detected word, detectable at the start of all the update events void LateUpdate() { if (_sReadyForLateUpdate) { _sReadyForLateUpdate = false; switch (_sFireState) { case FireState.DETECTED_FIRE: _sFireState = FireState.FIRE_ONCE; break; case FireState.FIRE_ONCE: _sFireState = FireState.FIRE_IDLE; break; case FireState.DETECTED_STOP: _sFireState = FireState.STOP_ONCE; break; case FireState.STOP_ONCE: _sFireState = FireState.IDLE; break; } } } void Update() { CheckForTurn(); CheckForFire(); // After update, use one late update to detect the async word _sReadyForLateUpdate = true; } void CheckForFire() { // Fire turret //if (!isFiring && Input.GetKeyDown(KeyCode.Mouse0)) if (!isFiring && _sFireState == FireState.FIRE_ONCE) { isFiring = true; fxController.Fire(); } // Stop firing //if (isFiring && Input.GetKeyUp(KeyCode.Mouse0)) if (isFiring && _sFireState == FireState.STOP_ONCE) { isFiring = false; fxController.Stop(); } }
To be able to call out the names of weapons and to add speech, I made some edits to the `F3DFXController` script.
using System.Collections; using System; using UnityEngine; using UnityEngine.UI; using UnityWebGLSpeechDetection; using UnityWebGLSpeechSynthesis; namespace Forge3D { // Weapon types public enum F3DFXType { Vulcan, SoloGun, Sniper, ShotGun, Seeker, RailGun, PlasmaGun, PlasmaBeam, PlasmaBeamHeavy, LightningGun, FlameRed, LaserImpulse } public class F3DFXController : MonoBehaviour { /// <summary> /// Voices drop down /// </summary> public Dropdown _mDropdownVoices = null; /// <summary> /// Reference to the proxy /// </summary> private ISpeechDetectionPlugin _mSpeechDetectionPlugin = null; /// <summary> /// Reference to the proxy /// </summary> private ISpeechSynthesisPlugin _mSpeechSynthesisPlugin = null; /// <summary> /// Reference to the supported voices /// </summary> private VoiceResult _mVoiceResult = null; /// <summary> /// Reference to the utterance, voice, and text to speak /// </summary> private SpeechSynthesisUtterance _mSpeechSynthesisUtterance = null; /// <summary> /// Track when the utterance is created /// </summary> private bool _mUtteranceSet = false; /// <summary> /// Track when the voices are created /// </summary> private bool _mVoicesSet = false; enum WeaponState { IDLE, DETECTED_LEFT, LEFT_ONCE, DETECTED_RIGHT, RIGHT_ONCE } // detect the word once in all updates private static WeaponState _sWeaponState = WeaponState.IDLE; // make sure all turrets detect the async word in their update event private static bool _sReadyForLateUpdate = false; // Singleton instance public static F3DFXController instance; // init the speech proxy private IEnumerator Start() { // get the singleton instance _mSpeechDetectionPlugin = ProxySpeechDetectionPlugin.GetInstance(); // check the reference to the plugin if (null == _mSpeechDetectionPlugin) { Debug.LogError("Proxy Speech Detection Plugin is not set!"); yield break; } // wait for plugin to become available while (!_mSpeechDetectionPlugin.IsAvailable()) { yield return null; } _mSpeechSynthesisPlugin = ProxySpeechSynthesisPlugin.GetInstance(); if (null == _mSpeechSynthesisPlugin) { Debug.LogError("Proxy Speech Synthesis Plugin is not set!"); yield break; } // wait for proxy to become available while (!_mSpeechSynthesisPlugin.IsAvailable()) { yield return null; } // subscribe to events _mSpeechDetectionPlugin.AddListenerOnDetectionResult(HandleDetectionResult); // abort and clear existing words _mSpeechDetectionPlugin.Abort(); // Get voices from proxy GetVoices(); // Create an instance of SpeechSynthesisUtterance _mSpeechSynthesisPlugin.CreateSpeechSynthesisUtterance((utterance) => { //Debug.LogFormat("Utterance created: {0}", utterance._mReference); _mSpeechSynthesisUtterance = utterance; // The utterance is set _mUtteranceSet = true; // Set the default voice if ready SetIfReadyForDefaultVoice(); }); } /// <summary> /// Get voices from the proxy /// </summary> /// <returns></returns> private void GetVoices() { // get voices from the proxy _mSpeechSynthesisPlugin.GetVoices((voiceResult) => { _mVoiceResult = voiceResult; // prepare the voices drop down items SpeechSynthesisUtils.PopulateVoicesDropdown(_mDropdownVoices, _mVoiceResult); // The voices are set _mVoicesSet = true; // Set the default voice if ready SetIfReadyForDefaultVoice(); }); } /// <summary> /// Set the default voice if voices and utterance are ready /// </summary> private void SetIfReadyForDefaultVoice() { if (_mVoicesSet && _mUtteranceSet) { // set the default voice SpeechSynthesisUtils.SetDefaultVoice(_mDropdownVoices); // enable voices dropdown SpeechSynthesisUtils.SetInteractable(true, _mDropdownVoices); Voice voice = SpeechSynthesisUtils.GetVoice(_mVoiceResult, SpeechSynthesisUtils.GetDefaultVoice()); _mSpeechSynthesisPlugin.SetVoice(_mSpeechSynthesisUtterance, voice); // drop down reference must be set if (_mDropdownVoices) { // set up the drop down change listener _mDropdownVoices.onValueChanged.AddListener(delegate { // handle the voice change event, and set the voice on the utterance SpeechSynthesisUtils.HandleVoiceChanged(_mDropdownVoices, _mVoiceResult, _mSpeechSynthesisUtterance, _mSpeechSynthesisPlugin); }); } } } /// <summary> /// Speak the utterance /// </summary> private void Speak(string text) { if (!_mVoicesSet || !_mUtteranceSet) { // not ready return; } // Cancel if already speaking _mSpeechSynthesisPlugin.Cancel(); // Set the text that will be spoken _mSpeechSynthesisPlugin.SetText(_mSpeechSynthesisUtterance, text); // Use the plugin to speak the utterance _mSpeechSynthesisPlugin.Speak(_mSpeechSynthesisUtterance); } // Handler for speech detection events void HandleDetectionResult(object sender, SpeechDetectionEventArgs args) { if (null == args.detectionResult) { return; } SpeechRecognitionResult[] results = args.detectionResult.results; if (null == results) { return; } bool doAbort = false; foreach (SpeechRecognitionResult result in results) { SpeechRecognitionAlternative[] alternatives = result.alternatives; if (null == alternatives) { continue; } foreach (SpeechRecognitionAlternative alternative in alternatives) { if (string.IsNullOrEmpty(alternative.transcript)) { continue; } string lower = alternative.transcript.ToLower(); Debug.LogFormat("Detected: {0}", lower); if (lower.Contains("left")) { if (_sWeaponState == WeaponState.IDLE) { _sWeaponState = WeaponState.DETECTED_LEFT; } doAbort = true; break; } else if (lower.Contains("right")) { if (_sWeaponState == WeaponState.IDLE) { _sWeaponState = WeaponState.DETECTED_RIGHT; } doAbort = true; break; } else if (lower.Contains("lightning")) { if (DefaultFXType != F3DFXType.LightningGun) { DefaultFXType = F3DFXType.LightningGun; Speak(string.Format("{0} is active, sir", DefaultFXType)); } doAbort = true; break; } else if (lower.Contains("beam")) { if (DefaultFXType != F3DFXType.PlasmaBeam) { DefaultFXType = F3DFXType.PlasmaBeam; Speak(string.Format("{0} is active, sir", DefaultFXType)); } doAbort = true; break; } } } // abort detection on match for faster matching on words instead of complete sentences if (doAbort) { _mSpeechDetectionPlugin.Abort(); } } // make the async detected word, detectable at the start of all the update events void LateUpdate() { if (_sReadyForLateUpdate) { _sReadyForLateUpdate = false; switch (_sWeaponState) { case WeaponState.DETECTED_LEFT: _sWeaponState = WeaponState.LEFT_ONCE; break; case WeaponState.LEFT_ONCE: _sWeaponState = WeaponState.IDLE; break; case WeaponState.DETECTED_RIGHT: _sWeaponState = WeaponState.RIGHT_ONCE; break; case WeaponState.RIGHT_ONCE: _sWeaponState = WeaponState.IDLE; break; } } } void Update() { // Switch weapon types using keyboard keys //if (Input.GetKeyDown(KeyCode.RightArrow)) if (_sWeaponState == WeaponState.LEFT_ONCE) NextWeapon(); //else if (Input.GetKeyDown(KeyCode.LeftArrow)) if (_sWeaponState == WeaponState.RIGHT_ONCE) PrevWeapon(); // After update, use one late update to detect the async word _sReadyForLateUpdate = true; }
My WebGL Speech Synthesis package has been accepted into the Unity Asset Store.
The [Sci-Fi Effects] assets comes with some great looking turrets and effects.
I used the [WebGL Speech Detection] package to add speech commands.
And to make speech work in the Unity editor, I added the [Chrome Speech Proxy].
To make Speech Detection work in the Turret example, I made some edits to the `F3DPlayerTurretController.cs` script.
// reference to the proxy private ProxySpeechDetectionPlugin _mProxySpeechDetectionPlugin = null; enum FireState { IDLE, DETECTED_FIRE, FIRE_ONCE, FIRE_IDLE, DETECTED_STOP, STOP_ONCE } // detect the word once in all updates private static FireState _sFireState = FireState.IDLE; // make sure all turrets detect the async word in their update event private static bool _sReadyForLateUpdate = false; // init the speech proxy private IEnumerator Start() { while (null == WebGLSpeechDetectionPlugin.GetInstance() || null == ProxySpeechDetectionPlugin.GetInstance() || !ProxySpeechDetectionPlugin.GetInstance().IsAvailable()) { yield return null; } // reference to the plugin WebGLSpeechDetectionPlugin plugin = WebGLSpeechDetectionPlugin.GetInstance(); // subscribe to events plugin.OnDetectionResult += HandleDetectionResult; // reference to the proxy _mProxySpeechDetectionPlugin = ProxySpeechDetectionPlugin.GetInstance(); // abort and clear existing words _mProxySpeechDetectionPlugin.Abort(); } // Handler for speech detection events void HandleDetectionResult(object sender, WebGLSpeechDetectionPlugin.SpeechDetectionEventArgs args) { if (null == args.detectionResult) { return; } WebGLSpeechDetectionPlugin.SpeechRecognitionResult[] results = args.detectionResult.results; if (null == results) { return; } bool doAbort = false; foreach (WebGLSpeechDetectionPlugin.SpeechRecognitionResult result in results) { WebGLSpeechDetectionPlugin.SpeechRecognitionAlternative[] alternatives = result.alternatives; if (null == alternatives) { continue; } foreach (WebGLSpeechDetectionPlugin.SpeechRecognitionAlternative alternative in alternatives) { if (string.IsNullOrEmpty(alternative.transcript)) { continue; } string lower = alternative.transcript.ToLower(); Debug.LogFormat("Detected: {0}", lower); if (lower.Contains("fire")) { if (_sFireState == FireState.IDLE) { _sFireState = FireState.DETECTED_FIRE; } doAbort = true; } if (lower.Contains("stop")) { if (_sFireState == FireState.FIRE_IDLE) { _sFireState = FireState.DETECTED_STOP; } doAbort = true; } } } // abort detection on match for faster matching on words instead of complete sentences if (doAbort) { _mProxySpeechDetectionPlugin.Abort(); } } // make the async detected word, detectable at the start of all the update events void LateUpdate() { if (_sReadyForLateUpdate) { _sReadyForLateUpdate = false; switch (_sFireState) { case FireState.DETECTED_FIRE: _sFireState = FireState.FIRE_ONCE; break; case FireState.FIRE_ONCE: _sFireState = FireState.FIRE_IDLE; break; case FireState.DETECTED_STOP: _sFireState = FireState.STOP_ONCE; break; case FireState.STOP_ONCE: _sFireState = FireState.IDLE; break; } } } void Update() { CheckForTurn(); CheckForFire(); // After update, use one late update to detect the async word _sReadyForLateUpdate = true; } void CheckForFire() { // Fire turret //if (!isFiring && Input.GetKeyDown(KeyCode.Mouse0)) if (!isFiring && _sFireState == FireState.FIRE_ONCE) { isFiring = true; fxController.Fire(); } // Stop firing //if (isFiring && Input.GetKeyUp(KeyCode.Mouse0)) if (isFiring && _sFireState == FireState.STOP_ONCE) { isFiring = false; fxController.Stop(); } }
To be able to call out the names of weapons, I made some edits to the `F3DFXController` script.
// reference to the proxy private ProxySpeechDetectionPlugin _mProxySpeechDetectionPlugin = null; enum WeaponState { IDLE, DETECTED_LEFT, LEFT_ONCE, DETECTED_RIGHT, RIGHT_ONCE } // detect the word once in all updates private static WeaponState _sWeaponState = WeaponState.IDLE; // make sure all turrets detect the async word in their update event private static bool _sReadyForLateUpdate = false; // Singleton instance public static F3DFXController instance; // init the speech proxy private IEnumerator Start() { while (null == WebGLSpeechDetectionPlugin.GetInstance() || null == ProxySpeechDetectionPlugin.GetInstance() || !ProxySpeechDetectionPlugin.GetInstance().IsAvailable()) { yield return null; } // reference to the plugin WebGLSpeechDetectionPlugin plugin = WebGLSpeechDetectionPlugin.GetInstance(); // subscribe to events plugin.OnDetectionResult += HandleDetectionResult; // reference to the proxy _mProxySpeechDetectionPlugin = ProxySpeechDetectionPlugin.GetInstance(); // abort and clear existing words _mProxySpeechDetectionPlugin.Abort(); } // Handler for speech detection events void HandleDetectionResult(object sender, WebGLSpeechDetectionPlugin.SpeechDetectionEventArgs args) { if (null == args.detectionResult) { return; } WebGLSpeechDetectionPlugin.SpeechRecognitionResult[] results = args.detectionResult.results; if (null == results) { return; } bool doAbort = false; foreach (WebGLSpeechDetectionPlugin.SpeechRecognitionResult result in results) { WebGLSpeechDetectionPlugin.SpeechRecognitionAlternative[] alternatives = result.alternatives; if (null == alternatives) { continue; } foreach (WebGLSpeechDetectionPlugin.SpeechRecognitionAlternative alternative in alternatives) { if (string.IsNullOrEmpty(alternative.transcript)) { continue; } string lower = alternative.transcript.ToLower(); Debug.LogFormat("Detected: {0}", lower); if (lower.Contains("left")) { if (_sWeaponState == WeaponState.IDLE) { _sWeaponState = WeaponState.DETECTED_LEFT; } doAbort = true; } else if (lower.Contains("right")) { if (_sWeaponState == WeaponState.IDLE) { _sWeaponState = WeaponState.DETECTED_RIGHT; } doAbort = true; } else if (lower.Contains("lightning")) { DefaultFXType = F3DFXType.LightningGun; doAbort = true; } else if (lower.Contains("beam")) { DefaultFXType = F3DFXType.PlasmaBeam; doAbort = true; } } } // abort detection on match for faster matching on words instead of complete sentences if (doAbort) { _mProxySpeechDetectionPlugin.Abort(); } } // make the async detected word, detectable at the start of all the update events void LateUpdate() { if (_sReadyForLateUpdate) { _sReadyForLateUpdate = false; switch (_sWeaponState) { case WeaponState.DETECTED_LEFT: _sWeaponState = WeaponState.LEFT_ONCE; break; case WeaponState.LEFT_ONCE: _sWeaponState = WeaponState.IDLE; break; case WeaponState.DETECTED_RIGHT: _sWeaponState = WeaponState.RIGHT_ONCE; break; case WeaponState.RIGHT_ONCE: _sWeaponState = WeaponState.IDLE; break; } } } void Update() { // Switch weapon types using keyboard keys //if (Input.GetKeyDown(KeyCode.RightArrow)) if (_sWeaponState == WeaponState.LEFT_ONCE) NextWeapon(); //else if (Input.GetKeyDown(KeyCode.LeftArrow)) if (_sWeaponState == WeaponState.RIGHT_ONCE) PrevWeapon(); // After update, use one late update to detect the async word _sReadyForLateUpdate = true; }
The [Chrome Speech Proxy] uses the Chrome Browser for the Speech API to do real-time speech detection without any quotas. This makes Speech Detection available on Windows and in the Unity editor.
Emotiv has a free [Community SDK] for interacting with the Insight and Epoc headsets.
The developer community hangs out on the [forums] and in the [G+ Community].
I created repositories to hold some new Unreal projects.
Documentation: [UnrealHTML5SpeechDetection] [private repo]
Research:
Default Local Build: [localhost:8000]
[Unreal: HTML5 – Getting Started]
[How to reduce HTML5 package size?]
Files Required for Final Deployment ----------------------------------- *.js.gz - compressed JavaScript files. *.data - compressed game content. *.mem - compressed memory initialization file. *.html - uncompressed landing page. *.symbols - uncompressed symbols, if necessary.
[HTML5: Call C++ UFUNCTION from Webpage via JavaScript]
[HTML5Platform.Automation.cs] includes [GameX.html.template]
[Connecting C++ and HTML5] [info]
Video: [Getting started with Emscripten – Transpiling C / C++ to JavaScript / HTML5]
Issues:
Pull Request: [Add support for including project and plugin JS]
I created a public repositories to hold the online documentation for Unity speech related packages.
[Demo 01 Unity Speech Dictation]
[Demo 02 Unity Speech Commands]
[Demo 01 Unity Speech Synthesis]
Documentation: [UnityWebGLSpeechDetection] [private]
Documentation: [UnityWebGLSpeechSynthesis] [private]
Documentation and Source: [UnityWebGLMicrophone]
Documentation: [WebGL: Interacting with browser scripting]
The [Chrome Speech Demo] supports multiple languages.
[UnityWebGLDropdown Test] [repo] [issue]
[LOW LEVEL PLUGINS IN UNITY WEBGL]
[Jasper] is an open source platform for developing always-on, voice-controlled applications.
Can a neural network learn to recognize doodles?
See how well it does with your drawings and help teach it, just by playing.
CodinGame has recently made the TensorFlow framework available for coding and designed a specific problem for it. It smartly enables Machine Learning beginners to practice the [TensorFlow tutorial]….
Techcrunch covers the [Microsoft Build 2016 Conference]. XBOX retail units can be used as dev kits while Microsoft is investing in bots and machine learning.
Google has a free tier to try the [Google Cloud Vision Beta] that can do OCR, detect objects, and detect facial expressions.
Compete for $5M in the [IBM Watson AI XPrize] to solve a world problem with AI.
Techcrunch did an [article] on MSFT moving the CNTK Machine Learning Toolkit from Codeplex to Github, which spiders off to a bunch of deep learning resources and AI history.