Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 2 additions & 4 deletions src/handlers/responseHandlers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -59,10 +59,8 @@ export async function responseHandler(
let providerTransformers = Providers[provider]?.responseTransforms;

if (providerConfig?.getConfig) {
providerTransformers = providerConfig.getConfig({
params: gatewayRequest,
providerOptions,
}).responseTransforms;
providerTransformers =
providerConfig.getConfig(gatewayRequest).responseTransforms;
}

// Checking status 200 so that errors are not considered as stream mode.
Expand Down
18 changes: 15 additions & 3 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -40,22 +40,34 @@ import modelResponsesHandler from './handlers/modelResponsesHandler';
import { logger } from './apm';
// Config
import conf from '../conf.json';
import { createCacheBackendsRedis } from './shared/services/cache';
import {
createCacheBackendsCF,
createCacheBackendsLocal,
createCacheBackendsRedis,
} from './shared/services/cache';
import { Environment } from './utils/env';

// Create a new Hono server instance
const app = new Hono();
const runtime = getRuntimeKey();

if (runtime === 'node' && process.env.REDIS_CONNECTION_STRING) {
createCacheBackendsRedis(process.env.REDIS_CONNECTION_STRING);
if (Environment().CACHE_BACKEND === 'redis') {
createCacheBackendsRedis(Environment().REDIS_CONNECTION_STRING);
} else if (Environment().CACHE_BACKEND === 'memcache') {
createCacheBackendsLocal();
}

/**
* Middleware that conditionally applies compression middleware based on the runtime.
* Compression is automatically handled for lagon and workerd runtimes
* This check if its not any of the 2 and then applies the compress middleware to avoid double compression.
*/
app.use('*', (c, next) => {
const runtimesThatDontNeedCompression = ['lagon', 'workerd', 'node'];
if (runtime === 'workerd') {
if (Environment(c).KV_STORE_WORKER_BASEPATH) return next();
createCacheBackendsCF(Environment(c));
}
if (runtimesThatDontNeedCompression.includes(runtime)) {
return next();
}
Expand Down
19 changes: 13 additions & 6 deletions src/providers/anthropic/api.ts
Original file line number Diff line number Diff line change
@@ -1,24 +1,31 @@
import { ProviderAPIConfig } from '../types';
import { Params } from '../../types/requestBody';

const AnthropicAPIConfig: ProviderAPIConfig = {
getBaseURL: () => 'https://api.anthropic.com/v1',
headers: ({ providerOptions, fn, gatewayRequestBody }) => {
headers: ({
providerOptions,
fn,
headers: requestHeaders,
gatewayRequestBody,
}) => {
const apiKey =
providerOptions.apiKey || requestHeaders?.['x-api-key'] || '';
const headers: Record<string, string> = {
'X-API-Key': `${providerOptions.apiKey}`,
'X-API-Key': apiKey,
};

// Accept anthropic_beta and anthropic_version in body to support enviroments which cannot send it in headers.
const betaHeader =
providerOptions?.['anthropicBeta'] ??
gatewayRequestBody?.['anthropic_beta'] ??
(gatewayRequestBody as Params)?.['anthropic_beta'] ??
'messages-2023-12-15';
const version =
providerOptions?.['anthropicVersion'] ??
gatewayRequestBody?.['anthropic_version'] ??
(gatewayRequestBody as Params)?.['anthropic_version'] ??
'2023-06-01';

if (fn === 'chatComplete') {
headers['anthropic-beta'] = betaHeader;
headers['anthropic-beta'] = betaHeader as string;
}
headers['anthropic-version'] = version;
return headers;
Expand Down
15 changes: 7 additions & 8 deletions src/providers/anthropic/chatComplete.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ import {
Params,
Message,
ContentType,
SYSTEM_MESSAGE_ROLES,
PromptCache,
SYSTEM_MESSAGE_ROLES,
} from '../../types/requestBody';
import {
ChatCompletionResponse,
Expand All @@ -18,7 +18,6 @@ import {
ANTHROPIC_STOP_REASON,
} from './types';
import {
generateErrorResponse,
generateInvalidProviderResponseError,
transformFinishReason,
} from '../utils';
Expand Down Expand Up @@ -151,7 +150,7 @@ const transformAssistantMessage = (msg: Message): AnthropicMessage => {
type: 'tool_use',
name: toolCall.function.name,
id: toolCall.id,
input: toolCall.function.arguments?.length
input: toolCall.function.arguments?.length // we need to send an empty object if the arguments are not provided
? JSON.parse(toolCall.function.arguments)
: {},
...(toolCall.cache_control && {
Expand Down Expand Up @@ -263,7 +262,7 @@ export const AnthropicChatCompleteConfig: ProviderConfig = {
transform: (params: Params) => {
let messages: AnthropicMessage[] = [];
// Transform the chat messages into a simple prompt
if (!!params.messages) {
if (params.messages) {
params.messages.forEach((msg: Message & PromptCache) => {
if (SYSTEM_MESSAGE_ROLES.includes(msg.role)) return;

Expand Down Expand Up @@ -315,7 +314,7 @@ export const AnthropicChatCompleteConfig: ProviderConfig = {
transform: (params: Params) => {
let systemMessages: AnthropicMessageContentItem[] = [];
// Transform the chat messages into a simple prompt
if (!!params.messages) {
if (params.messages) {
params.messages.forEach((msg: Message & PromptCache) => {
if (
SYSTEM_MESSAGE_ROLES.includes(msg.role) &&
Expand Down Expand Up @@ -494,13 +493,13 @@ export interface AnthropicChatCompleteStreamResponse {
cache_read_input_tokens?: number;
};
message?: {
model?: string;
usage?: {
output_tokens?: number;
input_tokens?: number;
cache_creation_input_tokens?: number;
cache_read_input_tokens?: number;
};
model?: string;
};
error?: AnthropicErrorObject;
}
Expand All @@ -527,7 +526,7 @@ export const AnthropicChatCompleteResponseTransform: (
output_tokens = 0,
cache_creation_input_tokens,
cache_read_input_tokens,
} = response?.usage;
} = response?.usage ?? {};

const shouldSendCacheUsage =
cache_creation_input_tokens || cache_read_input_tokens;
Expand Down Expand Up @@ -602,7 +601,7 @@ export const AnthropicChatCompleteStreamChunkTransform: (
response: string,
fallbackId: string,
streamState: AnthropicStreamState,
_strictOpenAiCompliance: boolean
strictOpenAiCompliance: boolean
) => string | undefined = (
responseChunk,
fallbackId,
Expand Down
9 changes: 3 additions & 6 deletions src/providers/anthropic/complete.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import { ANTHROPIC } from '../../globals';
import { Params } from '../../types/requestBody';
import { CompletionResponse, ErrorResponse, ProviderConfig } from '../types';
import { AnthropicErrorResponseTransform } from './utils';
import {
generateInvalidProviderResponseError,
transformFinishReason,
Expand All @@ -10,7 +11,6 @@ import {
AnthropicStreamState,
AnthropicErrorResponse,
} from './types';
import { AnthropicErrorResponseTransform } from './utils';

// TODO: this configuration does not enforce the maximum token limit for the input parameter. If you want to enforce this, you might need to add a custom validation function or a max property to the ParameterConfig interface, and then use it in the input configuration. However, this might be complex because the token count is not a simple length check, but depends on the specific tokenization method used by the model.

Expand Down Expand Up @@ -84,11 +84,8 @@ export const AnthropicCompleteResponseTransform: (
_responseHeaders,
strictOpenAiCompliance
) => {
if (responseStatus !== 200) {
const errorResposne = AnthropicErrorResponseTransform(
response as AnthropicErrorResponse
);
if (errorResposne) return errorResposne;
if (responseStatus !== 200 && 'error' in response) {
return AnthropicErrorResponseTransform(response);
}

if ('completion' in response) {
Expand Down
2 changes: 1 addition & 1 deletion src/providers/anthropic/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ const AnthropicConfig: ProviderConfigs = {
'stream-complete': AnthropicCompleteStreamChunkTransform,
complete: AnthropicCompleteResponseTransform,
chatComplete: AnthropicChatCompleteResponseTransform,
'stream-chatComplete': AnthropicChatCompleteStreamChunkTransform,
messages: AnthropicMessagesResponseTransform,
'stream-chatComplete': AnthropicChatCompleteStreamChunkTransform,
},
};

Expand Down
19 changes: 10 additions & 9 deletions src/providers/anthropic/types.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
export type AnthropicStreamState = {
containsChainOfThoughtMessage?: boolean;
toolIndex?: number;
usage?: {
prompt_tokens?: number;
Expand All @@ -9,6 +10,15 @@ export type AnthropicStreamState = {
model?: string;
};

// https://docs.anthropic.com/en/api/messages#response-stop-reason
export enum ANTHROPIC_STOP_REASON {
max_tokens = 'max_tokens',
stop_sequence = 'stop_sequence',
tool_use = 'tool_use',
end_turn = 'end_turn',
pause_turn = 'pause_turn',
}

export interface AnthropicErrorObject {
type: string;
message: string;
Expand All @@ -18,12 +28,3 @@ export interface AnthropicErrorResponse {
type: string;
error: AnthropicErrorObject;
}

// https://docs.anthropic.com/en/api/messages#response-stop-reason
export enum ANTHROPIC_STOP_REASON {
max_tokens = 'max_tokens',
stop_sequence = 'stop_sequence',
tool_use = 'tool_use',
end_turn = 'end_turn',
pause_turn = 'pause_turn',
}
4 changes: 2 additions & 2 deletions src/providers/anyscale/chatComplete.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { ANYSCALE } from '../../globals';
import { Params } from '../../types/requestBody';
import { Message, Params } from '../../types/requestBody';
import {
ChatCompletionResponse,
ErrorResponse,
Expand All @@ -22,7 +22,7 @@ export const AnyscaleChatCompleteConfig: ProviderConfig = {
param: 'messages',
default: '',
transform: (params: Params) => {
return params.messages?.map((message) => {
return params.messages?.map((message: Message) => {
if (message.role === 'developer') return { ...message, role: 'system' };
return message;
});
Expand Down
64 changes: 47 additions & 17 deletions src/providers/azure-ai-inference/api.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,9 @@ const AzureAIInferenceAPI: ProviderAPIConfig = {
if (azureFoundryUrl) {
return azureFoundryUrl;
}

return '';
},
headers: async ({ providerOptions, fn }) => {
headers: async ({ providerOptions, fn, c }) => {
const {
apiKey,
azureExtraParameters,
Expand Down Expand Up @@ -91,6 +90,7 @@ const AzureAIInferenceAPI: ProviderAPIConfig = {
const { azureManagedClientId } = providerOptions;
const resource = 'https://cognitiveservices.azure.com/';
const accessToken = await getAzureManagedIdentityToken(
c,
resource,
azureManagedClientId
);
Expand Down Expand Up @@ -167,24 +167,54 @@ const AzureAIInferenceAPI: ProviderAPIConfig = {
? ENDPOINT_MAPPING[mappedFn]
: `${ENDPOINT_MAPPING[mappedFn]}?${searchParamsString}`;
}
case 'realtime':
case 'imageGenerate':
case 'imageEdit':
case 'createSpeech':
case 'createTranscription':
case 'createTranslation':
case 'cancelBatch':
case 'createBatch':
case 'getBatchOutput':
case 'retrieveBatch':
case 'listBatches':
case 'retrieveFile':
case 'listFiles':
case 'deleteFile':
case 'realtime': {
return `${ENDPOINT_MAPPING[mappedFn]}?${searchParamsString}`;
}
case 'imageGenerate': {
return `${ENDPOINT_MAPPING[mappedFn]}?${searchParamsString}`;
}
case 'imageEdit': {
return `${ENDPOINT_MAPPING[mappedFn]}?${searchParamsString}`;
}
case 'createSpeech': {
return `${ENDPOINT_MAPPING[mappedFn]}?${searchParamsString}`;
}
case 'createTranscription': {
return `${ENDPOINT_MAPPING[mappedFn]}?${searchParamsString}`;
}
case 'createTranslation': {
return `${ENDPOINT_MAPPING[mappedFn]}?${searchParamsString}`;
}
case 'uploadFile': {
return `${ENDPOINT_MAPPING[mappedFn]}?${searchParamsString}`;
}
case 'retrieveFile': {
return `${ENDPOINT_MAPPING[mappedFn]}?${searchParamsString}`;
}
case 'listFiles': {
return `${ENDPOINT_MAPPING[mappedFn]}?${searchParamsString}`;
}
case 'deleteFile': {
return `${ENDPOINT_MAPPING[mappedFn]}?${searchParamsString}`;
}
case 'retrieveFileContent': {
return `${ENDPOINT_MAPPING[mappedFn]}?${searchParamsString}`;
}

case 'listBatches': {
return `${ENDPOINT_MAPPING[mappedFn]}?${searchParamsString}`;
}
case 'retrieveBatch': {
return `${ENDPOINT_MAPPING[mappedFn]}?${searchParamsString}`;
}
case 'cancelBatch': {
return `${ENDPOINT_MAPPING[mappedFn]}?${searchParamsString}`;
}
case 'getBatchOutput': {
return `${ENDPOINT_MAPPING[mappedFn]}?${searchParamsString}`;
}
case 'createBatch': {
return `${ENDPOINT_MAPPING[mappedFn]}?${searchParamsString}`;
}
default:
return '';
}
Expand Down
4 changes: 2 additions & 2 deletions src/providers/azure-ai-inference/chatComplete.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { Params } from '../../types/requestBody';
import { Message, Params } from '../../types/requestBody';
import { OpenAIErrorResponseTransform } from '../openai/utils';
import {
ChatCompletionResponse,
Expand All @@ -16,7 +16,7 @@ export const AzureAIInferenceChatCompleteConfig: ProviderConfig = {
param: 'messages',
default: '',
transform: (params: Params) => {
return params.messages?.map((message) => {
return params.messages?.map((message: Message) => {
if (message.role === 'developer') return { ...message, role: 'system' };
return message;
});
Expand Down
Loading
Loading