Exploring the hidden potential of sound data
π©π» Charlie Gerard
π©π»βπ» @devdevcharlie
π©π» Charlie Gerard
π©π»βπ» @devdevcharlie
π©π» Charlie Gerard
π©π»βπ» @devdevcharlie
Restaurant
Home
π©π» Charlie Gerard
π©π»βπ» @devdevcharlie
Restaurant
Home
Eating
Eating
Cooking
π©π» Charlie Gerard
π©π»βπ» @devdevcharlie
Restaurant
Home
Eating
Eating
Cooking
Breakfast / Lunch / Dinner
Breakfast / Lunch / Dinner
π©π» Charlie Gerard
π©π»βπ» @devdevcharlie
Restaurant
Home
Eating
Eating
Cooking
Breakfast / Lunch / Dinner
Breakfast / Lunch / Dinner
LOCATION
π©π» Charlie Gerard
π©π»βπ» @devdevcharlie
Restaurant
Home
Eating
Eating
Cooking
Breakfast / Lunch / Dinner
Breakfast / Lunch / Dinner
ACTIVITY
LOCATION
π©π» Charlie Gerard
π©π»βπ» @devdevcharlie
Restaurant
Home
Eating
Eating
Cooking
Breakfast / Lunch / Dinner
Breakfast / Lunch / Dinner
SUB-ACTIVITY
ACTIVITY
LOCATION
π©π» Charlie Gerard
π©π»βπ» @devdevcharlie
Front-end developer
Google Developer Expert & Mozilla Tech speaker
Charlie Gerard
π©π» Charlie Gerard
π©π»βπ» @devdevcharlie
ACOUSTIC ACTIVITY RECOGNITION
π©π» Charlie Gerard
π©π»βπ» @devdevcharlie
Using the rich properties of sound to gain insights about an activity or environment
ACOUSTIC ACTIVITY RECOGNITION
π©π» Charlie Gerard
π©π»βπ» @devdevcharlie
π©π» Charlie Gerard
π©π»βπ» @devdevcharlie
π©π» Charlie Gerard
π©π»βπ» @devdevcharlie
Web audio API
π©π» Charlie Gerard
π©π»βπ» @devdevcharlie
Visualizations
π©π» Charlie Gerard
π©π»βπ» @devdevcharlie
Spectrogram
π©π» Charlie Gerard
π©π»βπ» @devdevcharlie
Time
Spectrogram
π©π» Charlie Gerard
π©π»βπ» @devdevcharlie
Frequencies
Spectrogram
Time
π©π» Charlie Gerard
π©π»βπ» @devdevcharlie
Frequencies
Amplitude
Spectrogram
Time
π©π» Charlie Gerard
π©π»βπ» @devdevcharlie
π©π» Charlie Gerard
π©π»βπ» @devdevcharlie
π©π» Charlie Gerard
π©π»βπ» @devdevcharlie
π©π» Charlie Gerard
π©π»βπ» @devdevcharlie
[
[ 204, 10, 34,
11, 0, 3, 56,
78, 23, 89,
56, 67, β¦
],
[ 204, 10, 34,
11, 0, 3, 56,
78, 23, 89,
56, 67, β¦
],
[ 204, 10, 34,
11, 0, 3, 56,
78, 23, 89,
56, 67, β¦
],
[ 204, 10, 34,
11, 0, 3, 56,
78, 23, 89,
56, 67, β¦
],
...
]
Collecting data
[
[ 204, 10, 34,
11, 0, 3, 56,
78, 23, 89,
56, 67, β¦
],
[ 204, 10, 34,
11, 0, 3, 56,
78, 23, 89,
56, 67, β¦
],
[ 204, 10, 34,
11, 0, 3, 56,
78, 23, 89,
56, 67, β¦
],
[ 204, 10, 34,
11, 0, 3, 56,
78, 23, 89,
56, 67, β¦
],
...
]
[{
label: 0,
features:
[
[ 204, 10, 34,
11, 0, 3, 56,
78, 23, 89,
56, 67, β¦
],
[ 204, 10, 34,
11, 0, 3, 56,
78, 23, 89,
56, 67, β¦
],
β¦
]
},
{
label: 1,
features:
[
[ 204, 10, 34,
11, 0, 3, 56,
78, 23, 89,
56, 67, β¦
],
[ 204, 10, 34,
11, 0, 3, 56,
78, 23, 89,
56, 67, β¦
],
β¦
]
},
...]
Collecting data
Data transformation
[
[ 204, 10, 34,
11, 0, 3, 56,
78, 23, 89,
56, 67, β¦
],
[ 204, 10, 34,
11, 0, 3, 56,
78, 23, 89,
56, 67, β¦
],
[ 204, 10, 34,
11, 0, 3, 56,
78, 23, 89,
56, 67, β¦
],
[ 204, 10, 34,
11, 0, 3, 56,
78, 23, 89,
56, 67, β¦
],
...
]
[{
label: 0,
features:
[
[ 204, 10, 34,
11, 0, 3, 56,
78, 23, 89,
56, 67, β¦
],
[ 204, 10, 34,
11, 0, 3, 56,
78, 23, 89,
56, 67, β¦
],
β¦
]
},
{
label: 1,
features:
[
[ 204, 10, 34,
11, 0, 3, 56,
78, 23, 89,
56, 67, β¦
],
[ 204, 10, 34,
11, 0, 3, 56,
78, 23, 89,
56, 67, β¦
],
β¦
]
},
...]
Collecting data
// labels
[
[0,0,0,0,0],
[1,1,1,1,1],
β¦
]
// features
[
[
[204, 10, β¦],
[25, 45, β¦],
β¦
],
[
[45, 37, β¦],
[23, 67, β¦],
β¦
],
...
]
Data transformation
[
[ 204, 10, 34,
11, 0, 3, 56,
78, 23, 89,
56, 67, β¦
],
[ 204, 10, 34,
11, 0, 3, 56,
78, 23, 89,
56, 67, β¦
],
[ 204, 10, 34,
11, 0, 3, 56,
78, 23, 89,
56, 67, β¦
],
[ 204, 10, 34,
11, 0, 3, 56,
78, 23, 89,
56, 67, β¦
],
...
]
[{
label: 0,
features:
[
[ 204, 10, 34,
11, 0, 3, 56,
78, 23, 89,
56, 67, β¦
],
[ 204, 10, 34,
11, 0, 3, 56,
78, 23, 89,
56, 67, β¦
],
β¦
]
},
{
label: 1,
features:
[
[ 204, 10, 34,
11, 0, 3, 56,
78, 23, 89,
56, 67, β¦
],
[ 204, 10, 34,
11, 0, 3, 56,
78, 23, 89,
56, 67, β¦
],
β¦
]
},
...]
Collecting data
// labels
[
[0,0,0,0,0],
[1,1,1,1,1],
β¦
]
// features
[
[
[204, 10, β¦],
[25, 45, β¦],
β¦
],
[
[45, 37, β¦],
[23, 67, β¦],
β¦
],
...
]
Tensors
Data transformation
[
[ 204, 10, 34,
11, 0, 3, 56,
78, 23, 89,
56, 67, β¦
],
[ 204, 10, 34,
11, 0, 3, 56,
78, 23, 89,
56, 67, β¦
],
[ 204, 10, 34,
11, 0, 3, 56,
78, 23, 89,
56, 67, β¦
],
[ 204, 10, 34,
11, 0, 3, 56,
78, 23, 89,
56, 67, β¦
],
...
]
[{
label: 0,
features:
[
[ 204, 10, 34,
11, 0, 3, 56,
78, 23, 89,
56, 67, β¦
],
[ 204, 10, 34,
11, 0, 3, 56,
78, 23, 89,
56, 67, β¦
],
β¦
]
},
{
label: 1,
features:
[
[ 204, 10, 34,
11, 0, 3, 56,
78, 23, 89,
56, 67, β¦
],
[ 204, 10, 34,
11, 0, 3, 56,
78, 23, 89,
56, 67, β¦
],
β¦
]
},
...]
Collecting data
// labels
[
[0,0,0,0,0],
[1,1,1,1,1],
β¦
]
// features
[
[
[204, 10, β¦],
[25, 45, β¦],
β¦
],
[
[45, 37, β¦],
[23, 67, β¦],
β¦
],
...
]
Tensors
Algorithm
Data transformation
Training
[
[ 204, 10, 34,
11, 0, 3, 56,
78, 23, 89,
56, 67, β¦
],
[ 204, 10, 34,
11, 0, 3, 56,
78, 23, 89,
56, 67, β¦
],
[ 204, 10, 34,
11, 0, 3, 56,
78, 23, 89,
56, 67, β¦
],
[ 204, 10, 34,
11, 0, 3, 56,
78, 23, 89,
56, 67, β¦
],
...
]
[{
label: 0,
features:
[
[ 204, 10, 34,
11, 0, 3, 56,
78, 23, 89,
56, 67, β¦
],
[ 204, 10, 34,
11, 0, 3, 56,
78, 23, 89,
56, 67, β¦
],
β¦
]
},
{
label: 1,
features:
[
[ 204, 10, 34,
11, 0, 3, 56,
78, 23, 89,
56, 67, β¦
],
[ 204, 10, 34,
11, 0, 3, 56,
78, 23, 89,
56, 67, β¦
],
β¦
]
},
...]
Collecting data
// labels
[
[0,0,0,0,0],
[1,1,1,1,1],
β¦
]
// features
[
[
[204, 10, β¦],
[25, 45, β¦],
β¦
],
[
[45, 37, β¦],
[23, 67, β¦],
β¦
],
...
]
Tensors
Algorithm
Output / prediction
Data transformation
Training
<script src="https://cdn.jsdelivr.net/npm/@tensorflow/tfjs@1.3.1/dist/tf.min.js">
</script>
<script src="https://cdn.jsdelivr.net/npm/@tensorflow-models/speech-commands@0.4.0/dist/speech-commands.min.js">
</script>
π©π» Charlie Gerard
π©π»βπ» @devdevcharlie
async function setupModel(URL, predictionCB) {
//store the prediction and audio callback functions
predictionCallback = predictionCB;
const modelURL = `${URL}/model.json`;
const metadataURL = `${URL}/metadata.json`;
model = window.speechCommands.create('BROWSER_FFT', undefined, modelURL, metadataURL);
await model.ensureModelLoaded();
const modelParameters = {
invokeCallbackOnNoiseAndUnknown: true, // run even when only background noise is detected
includeSpectrogram: true, // give us access to numerical audio data
overlapFactor: 0.5 // how often per second to sample audio, 0.5 means twice per second
};
model.listen(
//This callback function is invoked each time the model has a prediction.
prediction => {
predictionCallback(prediction.scores);
},
modelParameters
);
}
async function setupModel(URL, predictionCB) {
//store the prediction and audio callback functions
predictionCallback = predictionCB;
const modelURL = `${URL}/model.json`;
const metadataURL = `${URL}/metadata.json`;
model = window.speechCommands.create('BROWSER_FFT', undefined, modelURL, metadataURL);
await model.ensureModelLoaded();
const modelParameters = {
invokeCallbackOnNoiseAndUnknown: true, // run even when only background noise is detected
includeSpectrogram: true, // give us access to numerical audio data
overlapFactor: 0.5 // how often per second to sample audio, 0.5 means twice per second
};
model.listen(
//This callback function is invoked each time the model has a prediction.
prediction => {
predictionCallback(prediction.scores);
},
modelParameters
);
}
async function setupModel(URL, predictionCB) {
//store the prediction and audio callback functions
predictionCallback = predictionCB;
const modelURL = `${URL}/model.json`;
const metadataURL = `${URL}/metadata.json`;
model = window.speechCommands.create('BROWSER_FFT', undefined, modelURL, metadataURL);
await model.ensureModelLoaded();
const modelParameters = {
invokeCallbackOnNoiseAndUnknown: true, // run even when only background noise is detected
includeSpectrogram: true, // give us access to numerical audio data
overlapFactor: 0.5 // how often per second to sample audio, 0.5 means twice per second
};
model.listen(
//This callback function is invoked each time the model has a prediction.
prediction => {
predictionCallback(prediction.scores);
},
modelParameters
);
}
async function setupModel(URL, predictionCB) {
//store the prediction and audio callback functions
predictionCallback = predictionCB;
const modelURL = `${URL}/model.json`;
const metadataURL = `${URL}/metadata.json`;
model = window.speechCommands.create('BROWSER_FFT', undefined, modelURL, metadataURL);
await model.ensureModelLoaded();
const modelParameters = {
invokeCallbackOnNoiseAndUnknown: true, // run even when only background noise is detected
includeSpectrogram: true, // give us access to numerical audio data
overlapFactor: 0.5 // how often per second to sample audio, 0.5 means twice per second
};
model.listen(
//This callback function is invoked each time the model has a prediction.
prediction => {
predictionCallback(prediction.scores);
},
modelParameters
);
}
let labels = ["Clapping","Speaking","_background_noise_"];
setupModel(URL, data => {
// data will look like this [0.87689, 0.21456, 0.56789]
switch(Math.max(...data)){
case data[0]:
currentPrediction = labels[0];
break;
case data[1]:
currentPrediction = labels[1];
break;
default:
currentPrediction = "";
break;
}
}
return currentPrediction;
});
DEMO
π©π» Charlie Gerard
π©π»βπ» @devdevcharlie
(β οΈ Early prototype optimised for Chrome desktop)
π©π» Charlie Gerard
π©π»βπ» @devdevcharlie
π©π» Charlie Gerard
π©π»βπ» @devdevcharlie
BENEFITS
π©π» Charlie Gerard
π©π»βπ» @devdevcharlie
1 sensor to rule them all
π©π» Charlie Gerard
π©π»βπ» @devdevcharlie
APPLICATIONS
π©π» Charlie Gerard
π©π»βπ» @devdevcharlie
Applications
Smart home / office
π©π» Charlie Gerard
π©π»βπ» @devdevcharlie
Smart home / office
Interactive storytelling
Applications
π©π» Charlie Gerard
π©π»βπ» @devdevcharlie
Smart home / office
Interactive storytelling
Health tracking
Applications
π©π» Charlie Gerard
π©π»βπ» @devdevcharlie
Automatic Youtube video captions of sound effects
Applications
π©π» Charlie Gerard
π©π»βπ» @devdevcharlie
Applications
π©π» Charlie Gerard
π©π»βπ» @devdevcharlie
LIMITATIONS
π©π» Charlie Gerard
π©π»βπ» @devdevcharlie
Limitations
π©π» Charlie Gerard
π©π»βπ» @devdevcharlie
General purpose synthetic sensors
π©π» Charlie Gerard
π©π»βπ» @devdevcharlie
π©π» Charlie Gerard
π©π»βπ» @devdevcharlie
RESOURCES
π©π» Charlie Gerard
π©π»βπ» @devdevcharlie
Resources
π©π» Charlie Gerard
π©π»βπ» @devdevcharlie
β₯οΈ
THANK YOU!
π©π» Charlie Gerard
π©π»βπ» @devdevcharlie