Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ require (
github.com/nats-io/jwt/v2 v2.8.1
github.com/nats-io/nats.go v1.51.0
github.com/nats-io/nkeys v0.4.15
github.com/openai/openai-go/v3 v3.33.0
github.com/pion/webrtc/v4 v4.2.11
github.com/redis/go-redis/v9 v9.18.0
github.com/sirupsen/logrus v1.9.4
Expand Down Expand Up @@ -109,6 +110,10 @@ require (
github.com/prometheus/common v0.67.5 // indirect
github.com/prometheus/procfs v0.20.1 // indirect
github.com/puzpuzpuz/xsync/v3 v3.5.1 // indirect
github.com/tidwall/gjson v1.18.0 // indirect
github.com/tidwall/match v1.1.1 // indirect
github.com/tidwall/pretty v1.2.1 // indirect
github.com/tidwall/sjson v1.2.5 // indirect
github.com/twitchtv/twirp v8.1.3+incompatible // indirect
github.com/valyala/bytebufferpool v1.0.0 // indirect
github.com/valyala/fasthttp v1.69.0 // indirect
Expand Down
12 changes: 12 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,8 @@ github.com/nats-io/nkeys v0.4.15 h1:JACV5jRVO9V856KOapQ7x+EY8Jo3qw1vJt/9Jpwzkk4=
github.com/nats-io/nkeys v0.4.15/go.mod h1:CpMchTXC9fxA5zrMo4KpySxNjiDVvr8ANOSZdiNfUrs=
github.com/nats-io/nuid v1.0.1 h1:5iA8DT8V7q8WK2EScv2padNa/rTESc1KdnPw4TC2paw=
github.com/nats-io/nuid v1.0.1/go.mod h1:19wcPz3Ph3q0Jbyiqsd0kePYG7A95tJPxeL+1OSON2c=
github.com/openai/openai-go/v3 v3.33.0 h1:aiETRPoLxnk6y3sIakXAdRCvtcLhdhBqHqIvEdOkEuc=
github.com/openai/openai-go/v3 v3.33.0/go.mod h1:cdufnVK14cWcT9qA1rRtrXx4FTRsgbDPW7Ia7SS5cZo=
github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U=
github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM=
github.com/opencontainers/image-spec v1.1.1 h1:y0fUlFfIZhPF1W537XOLg0/fcx6zcHCJwooC2xJA040=
Expand Down Expand Up @@ -280,6 +282,16 @@ github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
github.com/tidwall/gjson v1.14.2/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
github.com/tidwall/gjson v1.18.0 h1:FIDeeyB800efLX89e5a8Y0BNH+LOngJyGrIWxG2FKQY=
github.com/tidwall/gjson v1.18.0/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
github.com/tidwall/match v1.1.1 h1:+Ho715JplO36QYgwN9PGYNhgZvoUSc9X2c80KVTi+GA=
github.com/tidwall/match v1.1.1/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM=
github.com/tidwall/pretty v1.2.0/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU=
github.com/tidwall/pretty v1.2.1 h1:qjsOFOWWQl+N3RsoF5/ssm1pHmJJwhjlSbZ51I6wMl4=
github.com/tidwall/pretty v1.2.1/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU=
github.com/tidwall/sjson v1.2.5 h1:kLy8mja+1c9jlljvWTlSazM7cKDRfJuR/bOJhcY5NcY=
github.com/tidwall/sjson v1.2.5/go.mod h1:Fvgq9kS/6ociJEDnK0Fk1cpYF4FIW6ZF7LAe+6jwd28=
github.com/twitchtv/twirp v8.1.3+incompatible h1:+F4TdErPgSUbMZMwp13Q/KgDVuI7HJXP61mNV3/7iuU=
github.com/twitchtv/twirp v8.1.3+incompatible/go.mod h1:RRJoFSAmTEh2weEqWtpPE3vFK5YBhA6bqp2l1kfCC5A=
github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw=
Expand Down
168 changes: 168 additions & 0 deletions pkg/insights/providers/openai/client.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
// Package openai implements insights.Provider against the OpenAI API and any
// OpenAI-compatible HTTP backend (LocalAI, vLLM, llama.cpp-server, whisper.cpp,
// etc.). The base_url provider option lets operators point this provider at a
// self-hosted endpoint while keeping the same Go code path as for OpenAI cloud.
package openai

import (
"context"
"encoding/json"
"fmt"
"io"

openaisdk "github.com/openai/openai-go/v3"
"github.com/openai/openai-go/v3/option"

"github.com/mynaparrot/plugnmeet-protocol/plugnmeet"
"github.com/mynaparrot/plugnmeet-server/pkg/config"
"github.com/mynaparrot/plugnmeet-server/pkg/insights"
"github.com/sirupsen/logrus"
)

const (
defaultTranscriptionModel = "whisper-1"
defaultChatModel = "gpt-4o-mini"
defaultChunkSeconds = 5.0
transcriptionSampleRate = 16000
)

// Provider implements insights.Provider for OpenAI and OpenAI-compatible APIs.
type Provider struct {
account *config.ProviderAccount
service *config.ServiceConfig
client openaisdk.Client
logger *logrus.Entry
}

// NewProvider builds a Provider. The api_key credential is required; the
// base_url option is optional and overrides the SDK's default endpoint.
func NewProvider(providerAccount *config.ProviderAccount, serviceConfig *config.ServiceConfig, log *logrus.Entry) (insights.Provider, error) {
if providerAccount == nil {
return nil, fmt.Errorf("openai: provider account is nil")
}
if providerAccount.Credentials.APIKey == "" {
return nil, fmt.Errorf("openai: credentials.api_key is required")
}

opts := []option.RequestOption{
option.WithAPIKey(providerAccount.Credentials.APIKey),
}
if baseURL, _ := providerAccount.Options["base_url"].(string); baseURL != "" {
opts = append(opts, option.WithBaseURL(baseURL))
}

return &Provider{
account: providerAccount,
service: serviceConfig,
client: openaisdk.NewClient(opts...),
logger: log.WithField("service", "openai"),
}, nil
}

// CreateTranscription opens a chunked transcription stream that periodically
// uploads buffered PCM16 audio to /v1/audio/transcriptions and emits a
// final_result event per chunk.
func (p *Provider) CreateTranscription(ctx context.Context, roomId, userId string, options []byte) (insights.TranscriptionStream, error) {
opts := &insights.TranscriptionOptions{}
if len(options) > 0 {
if err := json.Unmarshal(options, opts); err != nil {
return nil, fmt.Errorf("openai: failed to unmarshal transcription options: %w", err)
}
}

model := p.serviceModel(defaultTranscriptionModel)
chunkSec := p.chunkSeconds()

return newChunkedStream(ctx, p.client, model, chunkSec, roomId, userId, opts, p.logger)
}

// TranslateText performs translation via Chat Completions with a JSON-schema
// constrained response. One round-trip handles all target languages.
func (p *Provider) TranslateText(ctx context.Context, text, sourceLang string, targetLangs []string) (*plugnmeet.InsightsTextTranslationResult, error) {
if len(targetLangs) == 0 {
return nil, fmt.Errorf("openai: at least one target language is required")
}
model := p.serviceModel(defaultChatModel)
return translateViaChatCompletions(ctx, p.client, model, text, sourceLang, targetLangs, p.logger)
}

// SynthesizeText is intentionally not implemented: TTS via the OpenAI audio
// endpoint can be added in a follow-up; until then we surface a clear error.
func (p *Provider) SynthesizeText(_ context.Context, _ []byte) (io.ReadCloser, error) {
return nil, fmt.Errorf("openai: speech synthesis not implemented")
}

// GetSupportedLanguages returns the static language lists for transcription
// and translation. Whisper / OpenAI translation models support far more codes
// than this list; we surface only the subset we exercise in PlugNmeet.
func (p *Provider) GetSupportedLanguages(serviceType insights.ServiceType) []*plugnmeet.InsightsSupportedLangInfo {
if langs, ok := supportedLanguages[serviceType]; ok {
out := make([]*plugnmeet.InsightsSupportedLangInfo, len(langs))
for i := range langs {
out[i] = &langs[i]
}
return out
}
return nil
}

// AITextChatStream is not supported by this provider in its current scope.
func (p *Provider) AITextChatStream(_ context.Context, _ string, _ []*plugnmeet.InsightsAITextChatContent) (<-chan *plugnmeet.InsightsAITextChatStreamResult, error) {
return nil, nil
}

// AIChatTextSummarize is not supported by this provider in its current scope.
func (p *Provider) AIChatTextSummarize(_ context.Context, _ string, _ []*plugnmeet.InsightsAITextChatContent) (string, uint32, uint32, error) {
return "", 0, 0, nil
}

// StartBatchSummarizeAudioFile is not supported by this provider.
func (p *Provider) StartBatchSummarizeAudioFile(_ context.Context, _, _, _ string) (string, string, error) {
return "", "", nil
}

// CheckBatchJobStatus is not supported by this provider.
func (p *Provider) CheckBatchJobStatus(_ context.Context, _ string) (*insights.BatchJobResponse, error) {
return nil, nil
}

// DeleteUploadedFile is not supported by this provider.
func (p *Provider) DeleteUploadedFile(_ context.Context, _ string) error {
return nil
}

// serviceModel reads the per-service model name from the service config,
// falling back to the supplied default. The provider account can also pin a
// model via its own options as a coarse default for both services.
func (p *Provider) serviceModel(fallback string) string {
if p.service != nil {
if m, _ := p.service.Options["model"].(string); m != "" {
return m
}
}
if p.account != nil {
if m, _ := p.account.Options["model"].(string); m != "" {
return m
}
}
return fallback
}

// chunkSeconds reads chunk_seconds from the provider account options. YAML
// numbers arrive as float64 from gopkg.in/yaml.v3; ints are accepted too.
func (p *Provider) chunkSeconds() float64 {
if p.account == nil {
return defaultChunkSeconds
}
switch v := p.account.Options["chunk_seconds"].(type) {
case float64:
if v > 0 {
return v
}
case int:
if v > 0 {
return float64(v)
}
}
return defaultChunkSeconds
}
79 changes: 79 additions & 0 deletions pkg/insights/providers/openai/languages.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
package openai

import (
"github.com/mynaparrot/plugnmeet-protocol/plugnmeet"
"github.com/mynaparrot/plugnmeet-server/pkg/insights"
)

// supportedLanguages enumerates the languages we surface for transcription
// and translation. The transcription set tracks Whisper's documented
// coverage; the translation set is the same superset since modern chat
// models (gpt-4o, gpt-4o-mini, Llama-class instruct models) translate
// between any of these pairs comfortably.
var supportedLanguages = map[insights.ServiceType][]plugnmeet.InsightsSupportedLangInfo{
insights.ServiceTypeTranscription: whisperLanguages(),
insights.ServiceTypeTranslation: whisperLanguages(),
}

func whisperLanguages() []plugnmeet.InsightsSupportedLangInfo {
return []plugnmeet.InsightsSupportedLangInfo{
{Code: "af", Name: "Afrikaans", Locale: "af"},
{Code: "ar", Name: "Arabic", Locale: "ar"},
{Code: "az", Name: "Azerbaijani", Locale: "az"},
{Code: "be", Name: "Belarusian", Locale: "be"},
{Code: "bg", Name: "Bulgarian", Locale: "bg"},
{Code: "bn", Name: "Bengali", Locale: "bn"},
{Code: "bs", Name: "Bosnian", Locale: "bs"},
{Code: "ca", Name: "Catalan", Locale: "ca"},
{Code: "cs", Name: "Czech", Locale: "cs"},
{Code: "cy", Name: "Welsh", Locale: "cy"},
{Code: "da", Name: "Danish", Locale: "da"},
{Code: "de", Name: "German", Locale: "de"},
{Code: "el", Name: "Greek", Locale: "el"},
{Code: "en", Name: "English", Locale: "en"},
{Code: "es", Name: "Spanish", Locale: "es"},
{Code: "et", Name: "Estonian", Locale: "et"},
{Code: "fa", Name: "Persian", Locale: "fa"},
{Code: "fi", Name: "Finnish", Locale: "fi"},
{Code: "fr", Name: "French", Locale: "fr"},
{Code: "gl", Name: "Galician", Locale: "gl"},
{Code: "he", Name: "Hebrew", Locale: "he"},
{Code: "hi", Name: "Hindi", Locale: "hi"},
{Code: "hr", Name: "Croatian", Locale: "hr"},
{Code: "hu", Name: "Hungarian", Locale: "hu"},
{Code: "hy", Name: "Armenian", Locale: "hy"},
{Code: "id", Name: "Indonesian", Locale: "id"},
{Code: "is", Name: "Icelandic", Locale: "is"},
{Code: "it", Name: "Italian", Locale: "it"},
{Code: "ja", Name: "Japanese", Locale: "ja"},
{Code: "kk", Name: "Kazakh", Locale: "kk"},
{Code: "kn", Name: "Kannada", Locale: "kn"},
{Code: "ko", Name: "Korean", Locale: "ko"},
{Code: "lt", Name: "Lithuanian", Locale: "lt"},
{Code: "lv", Name: "Latvian", Locale: "lv"},
{Code: "mi", Name: "Maori", Locale: "mi"},
{Code: "mk", Name: "Macedonian", Locale: "mk"},
{Code: "mr", Name: "Marathi", Locale: "mr"},
{Code: "ms", Name: "Malay", Locale: "ms"},
{Code: "ne", Name: "Nepali", Locale: "ne"},
{Code: "nl", Name: "Dutch", Locale: "nl"},
{Code: "no", Name: "Norwegian", Locale: "no"},
{Code: "pl", Name: "Polish", Locale: "pl"},
{Code: "pt", Name: "Portuguese", Locale: "pt"},
{Code: "ro", Name: "Romanian", Locale: "ro"},
{Code: "ru", Name: "Russian", Locale: "ru"},
{Code: "sk", Name: "Slovak", Locale: "sk"},
{Code: "sl", Name: "Slovenian", Locale: "sl"},
{Code: "sr", Name: "Serbian", Locale: "sr"},
{Code: "sv", Name: "Swedish", Locale: "sv"},
{Code: "sw", Name: "Swahili", Locale: "sw"},
{Code: "ta", Name: "Tamil", Locale: "ta"},
{Code: "th", Name: "Thai", Locale: "th"},
{Code: "tl", Name: "Tagalog", Locale: "tl"},
{Code: "tr", Name: "Turkish", Locale: "tr"},
{Code: "uk", Name: "Ukrainian", Locale: "uk"},
{Code: "ur", Name: "Urdu", Locale: "ur"},
{Code: "vi", Name: "Vietnamese", Locale: "vi"},
{Code: "zh", Name: "Chinese", Locale: "zh"},
}
}
Comment on lines +13 to +79
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

To improve efficiency and avoid allocating two identical slices, you can define the list of languages as a single package-level variable and reuse it in the supportedLanguages map. This also makes the code slightly more readable by removing the whisperLanguages() function.

var (
	// whisperLangs is the list of languages supported by Whisper for transcription.
	// We reuse this for translation as modern models support this set.
	whisperLangs = []plugnmeet.InsightsSupportedLangInfo{
		{Code: "af", Name: "Afrikaans", Locale: "af"},
		{Code: "ar", Name: "Arabic", Locale: "ar"},
		{Code: "az", Name: "Azerbaijani", Locale: "az"},
		{Code: "be", Name: "Belarusian", Locale: "be"},
		{Code: "bg", Name: "Bulgarian", Locale: "bg"},
		{Code: "bn", Name: "Bengali", Locale: "bn"},
		{Code: "bs", Name: "Bosnian", Locale: "bs"},
		{Code: "ca", Name: "Catalan", Locale: "ca"},
		{Code: "cs", Name: "Czech", Locale: "cs"},
		{Code: "cy", Name: "Welsh", Locale: "cy"},
		{Code: "da", Name: "Danish", Locale: "da"},
		{Code: "de", Name: "German", Locale: "de"},
		{Code: "el", Name: "Greek", Locale: "el"},
		{Code: "en", Name: "English", Locale: "en"},
		{Code: "es", Name: "Spanish", Locale: "es"},
		{Code: "et", Name: "Estonian", Locale: "et"},
		{Code: "fa", Name: "Persian", Locale: "fa"},
		{Code: "fi", Name: "Finnish", Locale: "fi"},
		{Code: "fr", Name: "French", Locale: "fr"},
		{Code: "gl", Name: "Galician", Locale: "gl"},
		{Code: "he", Name: "Hebrew", Locale: "he"},
		{Code: "hi", Name: "Hindi", Locale: "hi"},
		{Code: "hr", Name: "Croatian", Locale: "hr"},
		{Code: "hu", Name: "Hungarian", Locale: "hu"},
		{Code: "hy", Name: "Armenian", Locale: "hy"},
		{Code: "id", Name: "Indonesian", Locale: "id"},
		{Code: "is", Name: "Icelandic", Locale: "is"},
		{Code: "it", Name: "Italian", Locale: "it"},
		{Code: "ja", Name: "Japanese", Locale: "ja"},
		{Code: "kk", Name: "Kazakh", Locale: "kk"},
		{Code: "kn", Name: "Kannada", Locale: "kn"},
		{Code: "ko", Name: "Korean", Locale: "ko"},
		{Code: "lt", Name: "Lithuanian", Locale: "lt"},
		{Code: "lv", Name: "Latvian", Locale: "lv"},
		{Code: "mi", Name: "Maori", Locale: "mi"},
		{Code: "mk", Name: "Macedonian", Locale: "mk"},
		{Code: "mr", Name: "Marathi", Locale: "mr"},
		{Code: "ms", Name: "Malay", Locale: "ms"},
		{Code: "ne", Name: "Nepali", Locale: "ne"},
		{Code: "nl", Name: "Dutch", Locale: "nl"},
		{Code: "no", Name: "Norwegian", Locale: "no"},
		{Code: "pl", Name: "Polish", Locale: "pl"},
		{Code: "pt", Name: "Portuguese", Locale: "pt"},
		{Code: "ro", Name: "Romanian", Locale: "ro"},
		{Code: "ru", Name: "Russian", Locale: "ru"},
		{Code: "sk", Name: "Slovak", Locale: "sk"},
		{Code: "sl", Name: "Slovenian", Locale: "sl"},
		{Code: "sr", Name: "Serbian", Locale: "sr"},
		{Code: "sv", Name: "Swedish", Locale: "sv"},
		{Code: "sw", Name: "Swahili", Locale: "sw"},
		{Code: "ta", Name: "Tamil", Locale: "ta"},
		{Code: "th", Name: "Thai", Locale: "th"},
		{Code: "tl", Name: "Tagalog", Locale: "tl"},
		{Code: "tr", Name: "Turkish", Locale: "tr"},
		{Code: "uk", Name: "Ukrainian", Locale: "uk"},
		{Code: "ur", Name: "Urdu", Locale: "ur"},
		{Code: "vi", Name: "Vietnamese", Locale: "vi"},
		{Code: "zh", Name: "Chinese", Locale: "zh"},
	}

	// supportedLanguages enumerates the languages we surface for transcription
	// and translation.
	supportedLanguages = map[insights.ServiceType][]plugnmeet.InsightsSupportedLangInfo{
		insights.ServiceTypeTranscription: whisperLangs,
		insights.ServiceTypeTranslation:   whisperLangs,
	}
)

Loading