# Speech API

https://stackoverflow.com/questions/66602480/fastapi-uvicorn-not-logging-errors

the speech api is based on the work that bil did with the XR collab

# overview

TODO: rename the speech service to `api-speech`
TODO: update `cold-start.sh`


# selectable names

```py
selectableNames = [
    "Aaron Dreschner",
    "Abrahan Mack",
    "Adde Michal",
    "Alexandra Hisakawa",
    "Alison Dietlinde",
    "Alma María",
    "Ana Florence",
    "Andrew Chipper",
    "Annmarie Nele",
    "Asya Anara",
    "Badr Odhiambo",
    "Baldur Sanjin +",
    "Barbora MacLean",
    "Brenda Stern",
    "Camilla Holmström",
    "Chandra MacFarland",
    "Claribel Dervla",
    "Craig Gutsy",
    "Daisy Studious ",
    "Damian Black +",
    "Damjan Chapman",
    "Dionisio Schuyler",
    "Eugenio Mataracı",
    "Ferran Simen",
    "Filip Traverse",
    "Gilberto Mathias",
    "Gitta Nikolina",
    "Gracie Wise",
    "Henriette Usha",
    "Ige Behringer",
    "Ilkin Urbano",
    "Kazuhiko Atallah",
    "Kumar Dahl",
    "Lidiya Szekeres",
    "Lilya Stainthorpe",
    "Ludvig Milivoj",
    "Luis Moray",
    "Maja Ruoho",
    "Marcos Rudaski",
    "Narelle Moon",
    "Nova Hogarth",
    "Royston Min",
    "Rosemary Okafor",
    "Sofia Hellen",
    "Suad Qasim",
    "Szofi Granger",
    "Tammie Ema",
    "Tammy Grit",
    "Tanja Adelina",
    "Torcull Diarmuid",
    "Uta Obando",
    "Viktor Eka",
    "Viktor Menelaos",
    "Vjollca Johnnie",
    "Wulf Carlevaro +",
    "Xavier Hayasaka",
    "Zacharie Aimilios",
    "Zofija Kendrick",
]
```


# original reference code
```py
speech api focuses on both STT and TTS workflows

print(f">>>> Starting API")
import falcon.asgi
import whisper
from TTS.api import TTS
import numpy


selectableNames = [
    "Aaron Dreschner",
    "Abrahan Mack",
    "Adde Michal",
    "Alexandra Hisakawa",
    "Alison Dietlinde",
    "Alma María",
    "Ana Florence",
    "Andrew Chipper",
    "Annmarie Nele",
    "Asya Anara",
    "Badr Odhiambo",
    "Baldur Sanjin +",
    "Barbora MacLean",
    "Brenda Stern",
    "Camilla Holmström",
    "Chandra MacFarland",
    "Claribel Dervla",
    "Craig Gutsy",
    "Daisy Studious ",
    "Damian Black +",
    "Damjan Chapman",
    "Dionisio Schuyler",
    "Eugenio Mataracı",
    "Ferran Simen",
    "Filip Traverse",
    "Gilberto Mathias",
    "Gitta Nikolina",
    "Gracie Wise",
    "Henriette Usha",
    "Ige Behringer",
    "Ilkin Urbano",
    "Kazuhiko Atallah",
    "Kumar Dahl",
    "Lidiya Szekeres",
    "Lilya Stainthorpe",
    "Ludvig Milivoj",
    "Luis Moray",
    "Maja Ruoho",
    "Marcos Rudaski",
    "Narelle Moon",
    "Nova Hogarth",
    "Royston Min",
    "Rosemary Okafor",
    "Sofia Hellen",
    "Suad Qasim",
    "Szofi Granger",
    "Tammie Ema",
    "Tammy Grit",
    "Tanja Adelina",
    "Torcull Diarmuid",
    "Uta Obando",
    "Viktor Eka",
    "Viktor Menelaos",
    "Vjollca Johnnie",
    "Wulf Carlevaro +",
    "Xavier Hayasaka",
    "Zacharie Aimilios",
    "Zofija Kendrick",
]

selectableLanguages = [
    "en",
    "es",
    "de",
    "fr",
    "it",
    "pt",
    "pl",
    "tr",
    "ru",
    "nl",
    "cs",
    "ar",
    "zh-cn",
    "ja",
    "hu",
    "ko",
    "hi"
]


print(f">>>> Loading Models")
api = falcon.asgi.App(cors_enable=True)

ttsModel = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to("cuda")
print(f">>>> ... TTS Model Loaded")
sttModel = whisper.load_model("turbo", device="cuda")
print(f">>>> ... STT Model Loaded")


class STT:
    uri = "/stt"

    async def on_get(self, request, response):
        response.media = {"Status": "Server Running", "Model": "Whisper/turbo"}
        return

    async def on_post(self, req, res):
        print(f"\n>>>> STT Started")
        data = await req.get_media()
        audioDataArray = numpy.asarray(data["audioData"]).astype(numpy.float32)
        result = sttModel.transcribe(audioDataArray)
        res.content_type = "text/plain"
        res.text = result["text"]
        print("... STT complete")


class TTS:
    uri = "/tts"

    async def on_get(self, req, res):
        res.media = {"Status": "Server Running", "Model": "Coqui/xtts2"}

    async def on_post(self, req, res):
        print(f"\n>>>> TTS Started")
        print(f"\n{req.headers}\n")
        print(f"\n{req.params}\n")
        data = await req.get_media()
        params = req.params
        selectedSpeaker = ""
        try:
            selectedSpeaker = params["speaker"]
        except:
            selectedSpeaker = "Tammie Ema"

        if selectedSpeaker not in selectableNames:
            selectedSpeaker = "Tammie Ema"
        print(f'person is {selectedSpeaker}')

        selectedLanguage = ""
        try:
            selectedLanguage = params["lang"]
        except:
            selectedLanguage = "en"
        print(f'lang from param loaded')
        if selectedLanguage not in selectableLanguages:
            selectedLanguage = "en"          
        print(f'lang is {selectedLanguage}')  

        wav = ttsModel.tts(
            text=data["transcript"], speaker=selectedSpeaker, language=selectedLanguage
        )
        res.content_type = "application/json"
        res.media = str(wav)
        print("... TTS complete")


class Root:
    uri = "/"

    async def on_get(self, req, res):
        res.media = {"Status": "XRC Speech Server Running", "API": "stt, tts"}


api.add_route(Root.uri, Root())
api.add_route(STT.uri, STT())
api.add_route(TTS.uri, TTS())

```