MQTT-enabled (unintelligent) IoT Speaker using Amazon Polly

My home is full of ESPs and pieces of serverside MQTT-enabled bridges. I wanted to enable the network of sensors and nodes to produce intelligent and rich TTS output.

Use cases were:

    • Motion detection when blinds are closed should lead to a decent message
    • Summary of door bells during the day including the timestamp
    • TTS to my family, like a good morning message when I am away
    • Take DarkSky Net weather forecast and indicate when the weather is about to be bad within the next hour or so
    • Detect presence and provide intelligent daily summary of events
    • Voice output when there is an entry in my calendar, including the description of the calendar event
    • Voice output during evening time if certain lights, locks, blinds are not switched correctly
    • and much more …
    • Finally I wanted to have a neat Star Trek pager sound (like a signature sound) right before text is played
    • Multiple messages shall be queued FIFO so that no simultaneous messages are played
    • Amazon Polly Text To Speech was used

Hardware:

Dependencies:

# NodeJS
curl -sL https://deb.nodesource.com/setup_10.x | sudo -E bash -
sudo apt install -y nodejs

# Dependencies
sudo apt-get install libasound2-dev

npm install speaker
npm install play-sound
npm install aws-sdk
npm install mqtt

# Make AWS SDK work, do this on boot
export AWS_ACCESS_KEY_ID=FOOBAR
export AWS_SECRET_ACCESS_KEY=FOOBAR
export AWS_DEFAULT_REGION=eu-west-1
The code:
const AWS = require('aws-sdk')
const Stream = require('stream')
const Speaker = require('speaker')

var mqtt = require('mqtt')
var client  = mqtt.connect('mqtt://mqtt')

var player = require('play-sound')(opts = {})
var queue = [];
 
const Polly = new AWS.Polly({
    signatureVersion: 'v4',
    region: 'eu-west-1'
})

client.on('connect', function () {
  client.subscribe('jarvis/speak')
  client.publish('jarvis', 'Hello mqtt')
})

client.on('message', function (topic, message) {
  console.log("Speak: " + message.toString());  

  queue.push(message.toString());
  processQueue(true);
});

var speaking = false;

function processQueue(beep) {

  if(queue.length == 0)
    return;

  if(speaking)
    return;

  var value = queue.shift();

  speaking = true;

  if(beep) {
    player.play('hail.wav', function(err){
      if (err) throw err
    });

    setTimeout(function (){
      speak(value, "text");
    }, 1000);  
  }
  else
    speak(value, "text");
}

function speak(text, type) {

  var Player = new Speaker({
    channels: 1,
    bitDepth: 16,
    sampleRate: 16000
  })

  let params = {
    'TextType':type,
    'Text': text,
    'OutputFormat': 'pcm',
    'VoiceId': 'Vicki'
  }

  Polly.synthesizeSpeech(params, (err, data) => {
    if (err) {
        console.log(err.code)
    } else if (data) {
        if (data.AudioStream instanceof Buffer) {
            var bufferStream = new Stream.PassThrough()
            bufferStream.end(data.AudioStream)
            bufferStream.pipe(Player);

            Player.on('close', function () {
              speaking = false;
              processQueue(false);
            });
        }
     }
  })
}