Unity: Drive Turrets With Speech

The [Sci-Fi Effects] assets comes with some great looking turrets and effects.

I used the [WebGL Speech Detection] package to add speech commands.

And to make speech work in the Unity editor, I added the [Chrome Speech Proxy].

To make Speech Detection work in the Turret example, I made some edits to the `F3DPlayerTurretController.cs` script.

        // reference to the proxy
        private ProxySpeechDetectionPlugin _mProxySpeechDetectionPlugin = null;

        enum FireState
        {
            IDLE,
            DETECTED_FIRE,
            FIRE_ONCE,
            FIRE_IDLE,
            DETECTED_STOP,
            STOP_ONCE
        }

        // detect the word once in all updates
        private static FireState _sFireState = FireState.IDLE;

        // make sure all turrets detect the async word in their update event
        private static bool _sReadyForLateUpdate = false;

        // init the speech proxy
        private IEnumerator Start()
        {
            while (null == WebGLSpeechDetectionPlugin.GetInstance() ||
                null == ProxySpeechDetectionPlugin.GetInstance() ||
                !ProxySpeechDetectionPlugin.GetInstance().IsAvailable())
            {
                yield return null;
            }

            // reference to the plugin
            WebGLSpeechDetectionPlugin plugin = WebGLSpeechDetectionPlugin.GetInstance();

            // subscribe to events
            plugin.OnDetectionResult += HandleDetectionResult;

            // reference to the proxy
            _mProxySpeechDetectionPlugin = ProxySpeechDetectionPlugin.GetInstance();

            // abort and clear existing words
            _mProxySpeechDetectionPlugin.Abort();
        }

        // Handler for speech detection events
        void HandleDetectionResult(object sender, WebGLSpeechDetectionPlugin.SpeechDetectionEventArgs args)
        {
            if (null == args.detectionResult)
            {
                return;
            }
            WebGLSpeechDetectionPlugin.SpeechRecognitionResult[] results = args.detectionResult.results;
            if (null == results)
            {
                return;
            }
            bool doAbort = false;
            foreach (WebGLSpeechDetectionPlugin.SpeechRecognitionResult result in results)
            {
                WebGLSpeechDetectionPlugin.SpeechRecognitionAlternative[] alternatives = result.alternatives;
                if (null == alternatives)
                {
                    continue;
                }
                foreach (WebGLSpeechDetectionPlugin.SpeechRecognitionAlternative alternative in alternatives)
                {
                    if (string.IsNullOrEmpty(alternative.transcript))
                    {
                        continue;
                    }
                    string lower = alternative.transcript.ToLower();
                    Debug.LogFormat("Detected: {0}", lower);
                    if (lower.Contains("fire"))
                    {
                        if (_sFireState == FireState.IDLE)
                        {
                            _sFireState = FireState.DETECTED_FIRE;
                        }
                        doAbort = true;
                    }

                    if (lower.Contains("stop"))
                    {
                        if (_sFireState == FireState.FIRE_IDLE)
                        {
                            _sFireState = FireState.DETECTED_STOP;
                        }
                        doAbort = true;
                    }
                }
            }

            // abort detection on match for faster matching on words instead of complete sentences
            if (doAbort)
            {
                _mProxySpeechDetectionPlugin.Abort();
            }
        }

        // make the async detected word, detectable at the start of all the update events
        void LateUpdate()
        {
            if (_sReadyForLateUpdate)
            {
                _sReadyForLateUpdate = false;
                switch (_sFireState)
                {
                    case FireState.DETECTED_FIRE:
                        _sFireState = FireState.FIRE_ONCE;
                        break;
                    case FireState.FIRE_ONCE:
                        _sFireState = FireState.FIRE_IDLE;
                        break;
                    case FireState.DETECTED_STOP:
                        _sFireState = FireState.STOP_ONCE;
                        break;
                    case FireState.STOP_ONCE:
                        _sFireState = FireState.IDLE;
                        break;
                }
            }
        }

        void Update()
        {
            CheckForTurn();
            CheckForFire();

            // After update, use one late update to detect the async word
            _sReadyForLateUpdate = true;
        }

        void CheckForFire()
        {
            // Fire turret
            //if (!isFiring && Input.GetKeyDown(KeyCode.Mouse0))
            if (!isFiring && _sFireState == FireState.FIRE_ONCE)
            {
                isFiring = true;
                fxController.Fire();
            }

            // Stop firing
            //if (isFiring && Input.GetKeyUp(KeyCode.Mouse0))
            if (isFiring && _sFireState == FireState.STOP_ONCE)
            {
                isFiring = false;
                fxController.Stop();
            }
        }

To be able to call out the names of weapons, I made some edits to the `F3DFXController` script.

        // reference to the proxy
        private ProxySpeechDetectionPlugin _mProxySpeechDetectionPlugin = null;

        enum WeaponState
        {
            IDLE,
            DETECTED_LEFT,
            LEFT_ONCE,
            DETECTED_RIGHT,
            RIGHT_ONCE
        }

        // detect the word once in all updates
        private static WeaponState _sWeaponState = WeaponState.IDLE;

        // make sure all turrets detect the async word in their update event
        private static bool _sReadyForLateUpdate = false;

        // Singleton instance
        public static F3DFXController instance;

        // init the speech proxy
        private IEnumerator Start()
        {
            while (null == WebGLSpeechDetectionPlugin.GetInstance() ||
                null == ProxySpeechDetectionPlugin.GetInstance() ||
                !ProxySpeechDetectionPlugin.GetInstance().IsAvailable())
            {
                yield return null;
            }

            // reference to the plugin
            WebGLSpeechDetectionPlugin plugin = WebGLSpeechDetectionPlugin.GetInstance();

            // subscribe to events
            plugin.OnDetectionResult += HandleDetectionResult;

            // reference to the proxy
            _mProxySpeechDetectionPlugin = ProxySpeechDetectionPlugin.GetInstance();

            // abort and clear existing words
            _mProxySpeechDetectionPlugin.Abort();
        }

        // Handler for speech detection events
        void HandleDetectionResult(object sender, WebGLSpeechDetectionPlugin.SpeechDetectionEventArgs args)
        {
            if (null == args.detectionResult)
            {
                return;
            }
            WebGLSpeechDetectionPlugin.SpeechRecognitionResult[] results = args.detectionResult.results;
            if (null == results)
            {
                return;
            }
            bool doAbort = false;
            foreach (WebGLSpeechDetectionPlugin.SpeechRecognitionResult result in results)
            {
                WebGLSpeechDetectionPlugin.SpeechRecognitionAlternative[] alternatives = result.alternatives;
                if (null == alternatives)
                {
                    continue;
                }
                foreach (WebGLSpeechDetectionPlugin.SpeechRecognitionAlternative alternative in alternatives)
                {
                    if (string.IsNullOrEmpty(alternative.transcript))
                    {
                        continue;
                    }
                    string lower = alternative.transcript.ToLower();
                    Debug.LogFormat("Detected: {0}", lower);
                    if (lower.Contains("left"))
                    {
                        if (_sWeaponState == WeaponState.IDLE)
                        {
                            _sWeaponState = WeaponState.DETECTED_LEFT;
                        }
                        doAbort = true;
                    }

                    else if (lower.Contains("right"))
                    {
                        if (_sWeaponState == WeaponState.IDLE)
                        {
                            _sWeaponState = WeaponState.DETECTED_RIGHT;
                        }
                        doAbort = true;
                    }

                    else if (lower.Contains("lightning"))
                    {
                        DefaultFXType = F3DFXType.LightningGun;
                        doAbort = true;
                    }

                    else if (lower.Contains("beam"))
                    {
                        DefaultFXType = F3DFXType.PlasmaBeam;
                        doAbort = true;
                    }
                }
            }

            // abort detection on match for faster matching on words instead of complete sentences
            if (doAbort)
            {
                _mProxySpeechDetectionPlugin.Abort();
            }
        }

        // make the async detected word, detectable at the start of all the update events
        void LateUpdate()
        {
            if (_sReadyForLateUpdate)
            {
                _sReadyForLateUpdate = false;
                switch (_sWeaponState)
                {
                    case WeaponState.DETECTED_LEFT:
                        _sWeaponState = WeaponState.LEFT_ONCE;
                        break;
                    case WeaponState.LEFT_ONCE:
                        _sWeaponState = WeaponState.IDLE;
                        break;
                    case WeaponState.DETECTED_RIGHT:
                        _sWeaponState = WeaponState.RIGHT_ONCE;
                        break;
                    case WeaponState.RIGHT_ONCE:
                        _sWeaponState = WeaponState.IDLE;
                        break;
                }
            }
        }

        void Update()
        {
            // Switch weapon types using keyboard keys
            //if (Input.GetKeyDown(KeyCode.RightArrow))
            if (_sWeaponState == WeaponState.LEFT_ONCE)
                NextWeapon();
            //else if (Input.GetKeyDown(KeyCode.LeftArrow))
            if (_sWeaponState == WeaponState.RIGHT_ONCE)
                PrevWeapon();

            // After update, use one late update to detect the async word
            _sReadyForLateUpdate = true;
        }

Unreal: Speech Plugins

I created repositories to hold some new Unreal projects.

Documentation: [UnrealHTML5SpeechDetection] [private repo]

[Demo 01 – Speech Dictation]

Research:

Default Local Build: [localhost:8000]

[Unreal: HTML5 – Getting Started]

[How to reduce HTML5 package size?]

Files Required for Final Deployment
-----------------------------------

*.js.gz   - compressed JavaScript files.
*.data    - compressed game content.
*.mem     - compressed memory initialization file.
*.html    - uncompressed landing page.
*.symbols - uncompressed symbols, if necessary.

[HTML5: Call C++ UFUNCTION from Webpage via JavaScript]

[HTML5Platform.Automation.cs] includes [GameX.html.template]

[Connecting C++ and HTML5] [info]

Video: [Getting started with Emscripten – Transpiling C / C++ to JavaScript / HTML5]

[Unreal.js]

[Unreal.js-core]

Issues:

[UE4 Improvements]

[AnswerHub]

Pull Request: [Add support for including project and plugin JS]

Unity – Speech Plugins

I created a public repositories to hold the online documentation for Unity speech related packages.

[Demo 01 Unity Speech Dictation]

[Demo 02 Unity Speech Commands]

[Demo 01 Unity Speech Synthesis]

Documentation: [UnityWebGLSpeechDetection] [private]

Documentation: [UnityWebGLSpeechSynthesis] [private]

Documentation and Source: [UnityWebGLMicrophone]

Documentation: [WebGL: Interacting with browser scripting]

[Browser Speech Test]

The [Chrome Speech Demo] supports multiple languages.

[UnityWebGLDropdown Test] [repo] [issue]

[LOW LEVEL PLUGINS IN UNITY WEBGL]

[Firefox and the Web Speech API]