ABCDEFGHIJKLMNOPQRSTUVWXYZ
1
Original audio: Take a picture
2
3
4
1. [149, 95, 142, 102, 98, 141].wav 2.[144, 67, 96, 138, 109, 149].wav3.[145, 68, 120, 197, 192, 146].wav4.[148, 106, 112, 122, 190, 179].wav5.[144, 67, 96, 138, 109, 149].wav
5
Acoustic model: take a picture
language model: a picture
Acoustic model: take a picture
language model: a picture
Acoustic model: take a picture
language model: a picture
Acoustic model: take a picture
language model: a picture
Acoustic model: take a picture
language model: a picture
6
transcripts: ["transcripts": [ "transcripts": [transcripts: [transcripts: [transcripts: [transcripts: [transcripts: [transcripts: [transcripts: [
7
{ { { { { { { { { {
8
"confidence": -2.8685550689697266,
"confidence": -14.260931968688965,
"confidence": -3.338918447494507,
"confidence": -15.177739143371582,
"confidence": -3.160142660140991,
"confidence": -15.258406639099121,
"confidence": -2.9251599311828613,
"confidence": -14.71735954284668,
"confidence": -3.338918447494507,
"confidence": -15.177739143371582,
9
"words": [ "words": [ "words": [ "words": [ "words": [ "words": [ "words": [ "words": [ "words": [ "words": [
10
{ { { { { { { { { {
11
"word": "take", "word": "a", "word": "take", "word": "a", "word": "take", "word": "a", "word": "take", "word": "a", "word": "take", "word": "a",
12
"start_time ": 0.18, "start_time ": 0.36, "start_time ": 0.2, "start_time ": 0.22, "start_time ": 0.2, "start_time ": 0.22, "start_time ": 0.18, "start_time ": 0.36, "start_time ": 0.2, "start_time ": 0.22,
13
"duration": 0.14 "duration": 0.04 "duration": 0.16 "duration": 0.28 "duration": 0.16 "duration": 0.28 "duration": 0.12 "duration": 0.06 "duration": 0.16 "duration": 0.28
14
}, }, }, }, }, }, }, }, }, },
15
{ { { { { { { { { {
16
"word": "a", "word": "picture", "word": "a", "word": "picture", "word": "a", "word": "picture", "word": "a", "word": "picture", "word": "a", "word": "picture",
17
"start_time ": 0.36, "start_time ": 0.46, "start_time ": 0.42, "start_time ": 0.56, "start_time ": 0.42, "start_time ": 0.54, "start_time ": 0.36, "start_time ": 0.46, "start_time ": 0.42, "start_time ": 0.56,
18
"duration": 0.04 "duration": 0.26 "duration": 0.08 "duration": 0.3 "duration": 0.08 "duration": 0.24 "duration": 0.06 "duration": 0.28 "duration": 0.08 "duration": 0.3
19
}, } }, } }, } }, } }, }
20
{ ] { ] { ] { ] { ]
21
"word": "picture", }, "word": "picture", }, "word": "picture", }, "word": "picture", }, "word": "picture", },
22
"start_time ": 0.46, { "start_time ": 0.56, { "start_time ": 0.54, { "start_time ": 0.46, { "start_time ": 0.56, {
23
"duration": 0.26
"confidence": -14.978187561035156,
"duration": 0.3
"confidence": -15.459689140319824,
"duration": 0.24
"confidence": -15.272602081298828,
"duration": 0.28
"confidence": -15.042463302612305,
"duration": 0.3
"confidence": -15.459689140319824,
24
} "words": [ } "words": [ } "words": [ } "words": [ } "words": [
25
] { ] { ] { ] { ] {
26
}, "word": "take", }, "word": "take", }, "word": "take", }, "word": "take", }, "word": "take",
27
{ "start_time ": 0.18, { "start_time ": 0.2, { "start_time ": 0.2, { "start_time ": 0.18, { "start_time ": 0.2,
28
"confidence": -2.955415725708008,
"duration": 0.14
"confidence": -3.381296396255493,
"duration": 0.16
"confidence": -3.3245456218719482,
"duration": 0.16
"confidence": -3.2468247413635254,
"duration": 0.12
"confidence": -3.381296396255493,
"duration": 0.16
29
"words": [ }, "words": [ }, "words": [ }, "words": [ }, "words": [ },
30
{ { { { { { { { { {
31
"word": "takea", "word": "a", "word": "takea", "word": "a", "word": "take", "word": "a", "word": "takea", "word": "a", "word": "takea", "word": "a",
32
"start_time ": 0.18, "start_time ": 0.36, "start_time ": 0.2, "start_time ": 0.42, "start_time ": 0.2, "start_time ": 0.42, "start_time ": 0.18, "start_time ": 0.36, "start_time ": 0.2, "start_time ": 0.42,
33
"duration": 0.22 "duration": 0.04 "duration": 0.3 "duration": 0.08 "duration": 0.16 "duration": 0.08 "duration": 0.24 "duration": 0.06 "duration": 0.3 "duration": 0.08
34
}, }, }, }, }, }, }, }, }, },
35
{ { { { { { { { { {
36
"word": "picture", "word": "picture", "word": "picture", "word": "picture", "word": "u", "word": "picture", "word": "picture", "word": "picture", "word": "picture", "word": "picture",
37
"start_time ": 0.46, "start_time ": 0.46, "start_time ": 0.56, "start_time ": 0.56, "start_time ": 0.42, "start_time ": 0.54, "start_time ": 0.46, "start_time ": 0.46, "start_time ": 0.56, "start_time ": 0.56,
38
"duration": 0.26 "duration": 0.26 "duration": 0.3 "duration": 0.3 "duration": 0.08 "duration": 0.24 "duration": 0.28 "duration": 0.28 "duration": 0.3 "duration": 0.3
39
} } } } }, } } } } }
40
] ] ] ] { ] ] ] ] ]
41
}, }, }, }, "word": "picture", }, }, }, }, },
42
{ { { { "start_time ": 0.54, { { { { {
43
"confidence": -3.3442156314849854,
"confidence": -19.87220573425293,
"confidence": -3.4766082763671875,
"confidence": -20.00784683227539,
"duration": 0.24
"confidence": -20.213722229003906,
"confidence": -3.3635449409484863,
"confidence": -18.749309539794922,
"confidence": -3.4766082763671875,
"confidence": -20.00784683227539,
44
"words": [ "words": [ "words": [ "words": [ } "words": [ "words": [ "words": [ "words": [ "words": [
45
{ { { { ] { { { { {
46
"word": "tke", "word": "picture", "word": "take", "word": "picture", }, "word": "picture", "word": "tke", "word": "to", "word": "take", "word": "picture",
47
"start_time ": 0.18, "start_time ": 0.46, "start_time ": 0.2, "start_time ": 0.56, { "start_time ": 0.54, "start_time ": 0.18, "start_time ": 0.18, "start_time ": 0.2, "start_time ": 0.56,
48
"duration": 0.14 "duration": 0.26 "duration": 0.16 "duration": 0.3
"confidence": -3.389983892440796,
"duration": 0.24 "duration": 0.12 "duration": 0.24 "duration": 0.16 "duration": 0.3
49
}, } }, } "words": [ } }, }, }, }
50
{ ] { ] { ] { { { ]
51
"word": "a", }, "word": "u", }, "word": "takea", }, "word": "a", "word": "picture", "word": "u", },
52
"start_time ": 0.36, { "start_time ": 0.42, { "start_time ": 0.2, { "start_time ": 0.36, "start_time ": 0.46, "start_time ": 0.42, {
53
"duration": 0.04
"confidence": -20.1541748046875,
"duration": 0.08
"confidence": -20.351593017578125,
"duration": 0.3
"confidence": -20.468137741088867,
"duration": 0.06 "duration": 0.28 "duration": 0.08
"confidence": -20.351593017578125,
54
}, "words": [ }, "words": [ }, "words": [ }, } }, "words": [
55
{ { { { { { { ] { {
56
"word": "picture", "word": "take", "word": "picture", "word": "take", "word": "picture", "word": "take", "word": "picture", }, "word": "picture", "word": "take",
57
"start_time ": 0.46, "start_time ": 0.18, "start_time ": 0.56, "start_time ": 0.2, "start_time ": 0.54, "start_time ": 0.2, "start_time ": 0.46, { "start_time ": 0.56, "start_time ": 0.2,
58
"duration": 0.26 "duration": 0.22 "duration": 0.3 "duration": 0.3 "duration": 0.24 "duration": 0.3 "duration": 0.28
"confidence": -19.599260330200195,
"duration": 0.3 "duration": 0.3
59
} }, } }, } }, } "words": [ } },
60
] { ] { ] { ] { ] {
61
}, "word": "picture", }, "word": "picture", }, "word": "picture", }, "word": "picture", }, "word": "picture",
62
{ "start_time ": 0.46, { "start_time ": 0.56, { "start_time ": 0.54, { "start_time ": 0.46, { "start_time ": 0.56,
63
"confidence": -3.431145429611206,
"duration": 0.26
"confidence": -3.5200886726379395,
"duration": 0.3
"confidence": -3.556391477584839,
"duration": 0.24
"confidence": -3.650211811065674,
"duration": 0.28
"confidence": -3.5200886726379395,
"duration": 0.3
64
"words": [ } "words": [ } "words": [ } "words": [ } "words": [ }
65
{ ] { ] { ] { ] { ]
66
"word": "tkea", }, "word": "takeu", }, "word": "takeu", }, "word": "take", }, "word": "takeu", },
67
"start_time ": 0.18, { "start_time ": 0.2, { "start_time ": 0.2, { "start_time ": 0.18, { "start_time ": 0.2, {
68
"duration": 0.22
"confidence": -20.483951568603516,
"duration": 0.3
"confidence": -27.306528091430664,
"duration": 0.3
"confidence": -22.41034698486328,
"duration": 0.12
"confidence": -19.80386734008789,
"duration": 0.3
"confidence": -27.306528091430664,
69
}, "words": [ }, "words": [ }, "words": [ }, "words": [ }, "words": [
70
{ { { { { { { { { {
71
"word": "picture", "word": "a", "word": "picture", "word": "a", "word": "picture", "word": "i", "word": "", "word": "take", "word": "picture", "word": "a",
72
"start_time ": 0.46, "start_time ": 0.22, "start_time ": 0.56, "start_time ": 0.22, "start_time ": 0.54, "start_time ": 0.22, "start_time ": 0, "start_time ": 0.18, "start_time ": 0.56, "start_time ": 0.22,
73
"duration": 0.26 "duration": 0.1 "duration": 0.3 "duration": 0.28 "duration": 0.24 "duration": 0.28 "duration": 0.42 "duration": 0.24 "duration": 0.3 "duration": 0.28
74
} }, } }, } }, }, }, } },
75
] { ] { ] { { { ] {
76
}, "word": "a", }, "word": "picture", }, "word": "picture", "word": "picture", "word": "picture", }, "word": "picture",
77
{ "start_time ": 0.36, { "start_time ": 0.56, { "start_time ": 0.54, "start_time ": 0.46, "start_time ": 0.46, { "start_time ": 0.56,
78
"confidence": -3.6645984649658203,
"duration": 0.06
"confidence": -3.7095844745635986,
"duration": 0.34
"confidence": -3.6506288051605225,
"duration": 0.24 "duration": 0.28 "duration": 0.28
"confidence": -3.7095844745635986,
"duration": 0.34
79
"words": [ }, "words": [ } "words": [ } } } "words": [ }
80
{ { { ] { ] ] ] { ]
81
"word": "tae", "word": "picture", "word": "ake", } "word": "ake", } }, } "word": "ake", }
82
"start_time ": 0.18, "start_time ": 0.46, "start_time ": 0.22, ] "start_time ": 0.22, ] { ] "start_time ": 0.22, ]
83
"duration": 0.14 "duration": 0.26 "duration": 0.14 "duration": 0.14}
"confidence": -3.687258243560791,
} "duration": 0.14}
84
}, } }, }, "words": [ },
85
{ ] { { { {
86
"word": "a", } "word": "a", "word": "a", "word": "tkea", "word": "a",
87
"start_time ": 0.36, ] "start_time ": 0.42, "start_time ": 0.42, "start_time ": 0.18, "start_time ": 0.42,
88
"duration": 0.04} "duration": 0.08 "duration": 0.08 "duration": 0.24 "duration": 0.08
89
}, }, }, }, },
90
{ { { { {
91
"word": "picture", "word": "picture", "word": "picture", "word": "picture", "word": "picture",
92
"start_time ": 0.46, "start_time ": 0.56, "start_time ": 0.54, "start_time ": 0.46, "start_time ": 0.56,
93
"duration": 0.26 "duration": 0.3 "duration": 0.24 "duration": 0.28 "duration": 0.3
94
} } } } }
95
] ] ] ] ]
96
} } } } }
97
] ] ] ] ]
98
}}}}
99
100