wav2vec update
This commit is contained in:
20
README.MD
20
README.MD
@@ -23,9 +23,16 @@ from IPython.display import Audio
|
|||||||
from transformers import Wav2Vec2ForCTC, Wav2Vec2Tokenizer
|
from transformers import Wav2Vec2ForCTC, Wav2Vec2Tokenizer
|
||||||
</code></pre>
|
</code></pre>
|
||||||
|
|
||||||
|
<pre><code>
|
||||||
tokenizer = Wav2Vec2Tokenizer.from_pretrained("facebook/wav2vec2-base-960h")
|
tokenizer = Wav2Vec2Tokenizer.from_pretrained("facebook/wav2vec2-base-960h")
|
||||||
model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h")
|
model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h")
|
||||||
|
</code></pre>
|
||||||
|
|
||||||
|
<pre><code>
|
||||||
file_name = 'my-audio.wav'
|
file_name = 'my-audio.wav'
|
||||||
|
</code></pre>
|
||||||
|
|
||||||
|
<pre><code>
|
||||||
data = wavfile.read(file_name)
|
data = wavfile.read(file_name)
|
||||||
framerate = data[0]
|
framerate = data[0]
|
||||||
sounddata = data[1]
|
sounddata = data[1]
|
||||||
@@ -36,12 +43,15 @@ logits = model(input_values).logits
|
|||||||
predicted_ids = torch.argmax(logits, dim=-1)
|
predicted_ids = torch.argmax(logits, dim=-1)
|
||||||
transcription = tokenizer.batch_decode(predicted_ids)[0]
|
transcription = tokenizer.batch_decode(predicted_ids)[0]
|
||||||
print(transcription)
|
print(transcription)
|
||||||
Before we begin
|
</code></pre>
|
||||||
Make sure to check the full source code of this tutorial in this Github repo.
|
|
||||||
|
## Before we begin
|
||||||
|
Make sure to check the full source code of this tutorial in [this Github repo.](https://github.com/psavarmattas/SpeechToText)
|
||||||
|
|
||||||
|
## Wav2Vec: A Revolutionary Model
|
||||||
|
|
||||||
|
)
|
||||||
|
|
||||||
Wav2Vec: A Revolutionary Model
|
|
||||||
wave2vec | speech to text
|
|
||||||
Image 2
|
|
||||||
We will be using Wave2Vec — a state-of-the-art speech recognition approach by Facebook.
|
We will be using Wave2Vec — a state-of-the-art speech recognition approach by Facebook.
|
||||||
|
|
||||||
The researchers at Facebook describe this approach as:
|
The researchers at Facebook describe this approach as:
|
||||||
|
|||||||
BIN
assets/image2.png
Normal file
BIN
assets/image2.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 450 KiB |
Reference in New Issue
Block a user