My WebGL Speech Synthesis package has been accepted into the Unity Asset Store.
Category: Artificial Intelligence
Unity: Switch Weapons With Speech
Unity: Drive Turrets With Speech
The [Sci-Fi Effects] assets comes with some great looking turrets and effects.
I used the [WebGL Speech Detection] package to add speech commands.
And to make speech work in the Unity editor, I added the [Chrome Speech Proxy].
To make Speech Detection work in the Turret example, I made some edits to the `F3DPlayerTurretController.cs` script.
// reference to the proxy private ProxySpeechDetectionPlugin _mProxySpeechDetectionPlugin = null; enum FireState { IDLE, DETECTED_FIRE, FIRE_ONCE, FIRE_IDLE, DETECTED_STOP, STOP_ONCE } // detect the word once in all updates private static FireState _sFireState = FireState.IDLE; // make sure all turrets detect the async word in their update event private static bool _sReadyForLateUpdate = false; // init the speech proxy private IEnumerator Start() { while (null == WebGLSpeechDetectionPlugin.GetInstance() || null == ProxySpeechDetectionPlugin.GetInstance() || !ProxySpeechDetectionPlugin.GetInstance().IsAvailable()) { yield return null; } // reference to the plugin WebGLSpeechDetectionPlugin plugin = WebGLSpeechDetectionPlugin.GetInstance(); // subscribe to events plugin.OnDetectionResult += HandleDetectionResult; // reference to the proxy _mProxySpeechDetectionPlugin = ProxySpeechDetectionPlugin.GetInstance(); // abort and clear existing words _mProxySpeechDetectionPlugin.Abort(); } // Handler for speech detection events void HandleDetectionResult(object sender, WebGLSpeechDetectionPlugin.SpeechDetectionEventArgs args) { if (null == args.detectionResult) { return; } WebGLSpeechDetectionPlugin.SpeechRecognitionResult[] results = args.detectionResult.results; if (null == results) { return; } bool doAbort = false; foreach (WebGLSpeechDetectionPlugin.SpeechRecognitionResult result in results) { WebGLSpeechDetectionPlugin.SpeechRecognitionAlternative[] alternatives = result.alternatives; if (null == alternatives) { continue; } foreach (WebGLSpeechDetectionPlugin.SpeechRecognitionAlternative alternative in alternatives) { if (string.IsNullOrEmpty(alternative.transcript)) { continue; } string lower = alternative.transcript.ToLower(); Debug.LogFormat("Detected: {0}", lower); if (lower.Contains("fire")) { if (_sFireState == FireState.IDLE) { _sFireState = FireState.DETECTED_FIRE; } doAbort = true; } if (lower.Contains("stop")) { if (_sFireState == FireState.FIRE_IDLE) { _sFireState = FireState.DETECTED_STOP; } doAbort = true; } } } // abort detection on match for faster matching on words instead of complete sentences if (doAbort) { _mProxySpeechDetectionPlugin.Abort(); } } // make the async detected word, detectable at the start of all the update events void LateUpdate() { if (_sReadyForLateUpdate) { _sReadyForLateUpdate = false; switch (_sFireState) { case FireState.DETECTED_FIRE: _sFireState = FireState.FIRE_ONCE; break; case FireState.FIRE_ONCE: _sFireState = FireState.FIRE_IDLE; break; case FireState.DETECTED_STOP: _sFireState = FireState.STOP_ONCE; break; case FireState.STOP_ONCE: _sFireState = FireState.IDLE; break; } } } void Update() { CheckForTurn(); CheckForFire(); // After update, use one late update to detect the async word _sReadyForLateUpdate = true; } void CheckForFire() { // Fire turret //if (!isFiring && Input.GetKeyDown(KeyCode.Mouse0)) if (!isFiring && _sFireState == FireState.FIRE_ONCE) { isFiring = true; fxController.Fire(); } // Stop firing //if (isFiring && Input.GetKeyUp(KeyCode.Mouse0)) if (isFiring && _sFireState == FireState.STOP_ONCE) { isFiring = false; fxController.Stop(); } }
To be able to call out the names of weapons, I made some edits to the `F3DFXController` script.
// reference to the proxy private ProxySpeechDetectionPlugin _mProxySpeechDetectionPlugin = null; enum WeaponState { IDLE, DETECTED_LEFT, LEFT_ONCE, DETECTED_RIGHT, RIGHT_ONCE } // detect the word once in all updates private static WeaponState _sWeaponState = WeaponState.IDLE; // make sure all turrets detect the async word in their update event private static bool _sReadyForLateUpdate = false; // Singleton instance public static F3DFXController instance; // init the speech proxy private IEnumerator Start() { while (null == WebGLSpeechDetectionPlugin.GetInstance() || null == ProxySpeechDetectionPlugin.GetInstance() || !ProxySpeechDetectionPlugin.GetInstance().IsAvailable()) { yield return null; } // reference to the plugin WebGLSpeechDetectionPlugin plugin = WebGLSpeechDetectionPlugin.GetInstance(); // subscribe to events plugin.OnDetectionResult += HandleDetectionResult; // reference to the proxy _mProxySpeechDetectionPlugin = ProxySpeechDetectionPlugin.GetInstance(); // abort and clear existing words _mProxySpeechDetectionPlugin.Abort(); } // Handler for speech detection events void HandleDetectionResult(object sender, WebGLSpeechDetectionPlugin.SpeechDetectionEventArgs args) { if (null == args.detectionResult) { return; } WebGLSpeechDetectionPlugin.SpeechRecognitionResult[] results = args.detectionResult.results; if (null == results) { return; } bool doAbort = false; foreach (WebGLSpeechDetectionPlugin.SpeechRecognitionResult result in results) { WebGLSpeechDetectionPlugin.SpeechRecognitionAlternative[] alternatives = result.alternatives; if (null == alternatives) { continue; } foreach (WebGLSpeechDetectionPlugin.SpeechRecognitionAlternative alternative in alternatives) { if (string.IsNullOrEmpty(alternative.transcript)) { continue; } string lower = alternative.transcript.ToLower(); Debug.LogFormat("Detected: {0}", lower); if (lower.Contains("left")) { if (_sWeaponState == WeaponState.IDLE) { _sWeaponState = WeaponState.DETECTED_LEFT; } doAbort = true; } else if (lower.Contains("right")) { if (_sWeaponState == WeaponState.IDLE) { _sWeaponState = WeaponState.DETECTED_RIGHT; } doAbort = true; } else if (lower.Contains("lightning")) { DefaultFXType = F3DFXType.LightningGun; doAbort = true; } else if (lower.Contains("beam")) { DefaultFXType = F3DFXType.PlasmaBeam; doAbort = true; } } } // abort detection on match for faster matching on words instead of complete sentences if (doAbort) { _mProxySpeechDetectionPlugin.Abort(); } } // make the async detected word, detectable at the start of all the update events void LateUpdate() { if (_sReadyForLateUpdate) { _sReadyForLateUpdate = false; switch (_sWeaponState) { case WeaponState.DETECTED_LEFT: _sWeaponState = WeaponState.LEFT_ONCE; break; case WeaponState.LEFT_ONCE: _sWeaponState = WeaponState.IDLE; break; case WeaponState.DETECTED_RIGHT: _sWeaponState = WeaponState.RIGHT_ONCE; break; case WeaponState.RIGHT_ONCE: _sWeaponState = WeaponState.IDLE; break; } } } void Update() { // Switch weapon types using keyboard keys //if (Input.GetKeyDown(KeyCode.RightArrow)) if (_sWeaponState == WeaponState.LEFT_ONCE) NextWeapon(); //else if (Input.GetKeyDown(KeyCode.LeftArrow)) if (_sWeaponState == WeaponState.RIGHT_ONCE) PrevWeapon(); // After update, use one late update to detect the async word _sReadyForLateUpdate = true; }
Chrome Speech Proxy
The [Chrome Speech Proxy] uses the Chrome Browser for the Speech API to do real-time speech detection without any quotas. This makes Speech Detection available on Windows and in the Unity editor.
Emotiv: Community SDK
Emotiv has a free [Community SDK] for interacting with the Insight and Epoc headsets.
The developer community hangs out on the [forums] and in the [G+ Community].
Unreal: Speech Plugins
I created repositories to hold some new Unreal projects.
Documentation: [UnrealHTML5SpeechDetection] [private repo]
Research:
Default Local Build: [localhost:8000]
[Unreal: HTML5 – Getting Started]
[How to reduce HTML5 package size?]
Files Required for Final Deployment ----------------------------------- *.js.gz - compressed JavaScript files. *.data - compressed game content. *.mem - compressed memory initialization file. *.html - uncompressed landing page. *.symbols - uncompressed symbols, if necessary.
[HTML5: Call C++ UFUNCTION from Webpage via JavaScript]
[HTML5Platform.Automation.cs] includes [GameX.html.template]
[Connecting C++ and HTML5] [info]
Video: [Getting started with Emscripten – Transpiling C / C++ to JavaScript / HTML5]
Issues:
Pull Request: [Add support for including project and plugin JS]
Microsoft Debuts Customizable Speech-To-Text Tech, Releases Some Cognitive Services Tools To Developers
Unity – Speech Plugins
I created a public repositories to hold the online documentation for Unity speech related packages.
[Demo 01 Unity Speech Dictation]
[Demo 02 Unity Speech Commands]
[Demo 01 Unity Speech Synthesis]
Documentation: [UnityWebGLSpeechDetection] [private]
Documentation: [UnityWebGLSpeechSynthesis] [private]
Documentation and Source: [UnityWebGLMicrophone]
Documentation: [WebGL: Interacting with browser scripting]
The [Chrome Speech Demo] supports multiple languages.
[UnityWebGLDropdown Test] [repo] [issue]
[LOW LEVEL PLUGINS IN UNITY WEBGL]
Jasper Voice Control
[Jasper] is an open source platform for developing always-on, voice-controlled applications.
Google AI Wrote Code To Do Better Translation
Unite Europe 2016 – Next Generation AI for Unity
Apple Publishes Its First AI Research Paper
Quick Draw With Google
Can a neural network learn to recognize doodles?
See how well it does with your drawings and help teach it, just by playing.
Lip Reading AI More Accurate Than Humans
Adobe experiment slips new words into your voice recordings
Amazon to spend $2.5M on university competition to build “socialbotâ€
Microsoft reorganizes to create a dedicated AI division
Google opens up its machine learning tricks to all
Machine Learning comes to CodinGame
CodinGame has recently made the TensorFlow framework available for coding and designed a specific problem for it. It smartly enables Machine Learning beginners to practice the [TensorFlow tutorial]….
Microsoft Build 2016 Coverage
Techcrunch covers the [Microsoft Build 2016 Conference]. XBOX retail units can be used as dev kits while Microsoft is investing in bots and machine learning.
Cloud Vision API Beta
Google has a free tier to try the [Google Cloud Vision Beta] that can do OCR, detect objects, and detect facial expressions.
XPrize AI Prize
Compete for $5M in the [IBM Watson AI XPrize] to solve a world problem with AI.
Microsoft CNTK Machine Learning Toolkit
Techcrunch did an [article] on MSFT moving the CNTK Machine Learning Toolkit from Codeplex to Github, which spiders off to a bunch of deep learning resources and AI history.
Google Deep Learning
Google created a free [deep-learning] course on Udacity that should take about 3 months to complete.
Machine Learning Via Coursera
Stanford University offers a [Machine Learning Course] with an optional certificate upon completion. The free option gives immediate access to the first week of course materials. The full course is available on scheduled sessions.