Portkey-AI · narengogi · Oct 30, 2025 · Oct 30, 2025 · Oct 30, 2025 · Oct 30, 2025
diff --git a/package-lock.json b/package-lock.json
diff --git a/src/handlers/responseHandlers.ts b/src/handlers/responseHandlers.ts
@@ -59,10 +59,8 @@ export async function responseHandler(
   let providerTransformers = Providers[provider]?.responseTransforms;
 
   if (providerConfig?.getConfig) {
-    providerTransformers = providerConfig.getConfig({
-      params: gatewayRequest,
-      providerOptions,
-    }).responseTransforms;
+    providerTransformers =
+      providerConfig.getConfig(gatewayRequest).responseTransforms;
   }
 
   // Checking status 200 so that errors are not considered as stream mode.

diff --git a/src/index.ts b/src/index.ts
@@ -40,22 +40,34 @@ import modelResponsesHandler from './handlers/modelResponsesHandler';
 import { logger } from './apm';
 // Config
 import conf from '../conf.json';
-import { createCacheBackendsRedis } from './shared/services/cache';
+import {
+  createCacheBackendsCF,
+  createCacheBackendsLocal,
+  createCacheBackendsRedis,
+} from './shared/services/cache';
+import { Environment } from './utils/env';
 
 // Create a new Hono server instance
 const app = new Hono();
 const runtime = getRuntimeKey();
 
-if (runtime === 'node' && process.env.REDIS_CONNECTION_STRING) {
-  createCacheBackendsRedis(process.env.REDIS_CONNECTION_STRING);
+if (Environment().CACHE_BACKEND === 'redis') {
+  createCacheBackendsRedis(Environment().REDIS_CONNECTION_STRING);
+} else if (Environment().CACHE_BACKEND === 'memcache') {
+  createCacheBackendsLocal();
 }
+
 /**
  * Middleware that conditionally applies compression middleware based on the runtime.
  * Compression is automatically handled for lagon and workerd runtimes
  * This check if its not any of the 2 and then applies the compress middleware to avoid double compression.
  */
 app.use('*', (c, next) => {
   const runtimesThatDontNeedCompression = ['lagon', 'workerd', 'node'];
+  if (runtime === 'workerd') {
+    if (Environment(c).KV_STORE_WORKER_BASEPATH) return next();
+    createCacheBackendsCF(Environment(c));
+  }
   if (runtimesThatDontNeedCompression.includes(runtime)) {
     return next();
   }

diff --git a/src/providers/anthropic/api.ts b/src/providers/anthropic/api.ts
@@ -1,24 +1,31 @@
 import { ProviderAPIConfig } from '../types';
+import { Params } from '../../types/requestBody';
 
 const AnthropicAPIConfig: ProviderAPIConfig = {
   getBaseURL: () => 'https://api.anthropic.com/v1',
-  headers: ({ providerOptions, fn, gatewayRequestBody }) => {
+  headers: ({
+    providerOptions,
+    fn,
+    headers: requestHeaders,
+    gatewayRequestBody,
+  }) => {
+    const apiKey =
+      providerOptions.apiKey || requestHeaders?.['x-api-key'] || '';
     const headers: Record<string, string> = {
-      'X-API-Key': `${providerOptions.apiKey}`,
+      'X-API-Key': apiKey,
     };
 
-    // Accept anthropic_beta and anthropic_version in body to support enviroments which cannot send it in headers.
     const betaHeader =
       providerOptions?.['anthropicBeta'] ??
-      gatewayRequestBody?.['anthropic_beta'] ??
+      (gatewayRequestBody as Params)?.['anthropic_beta'] ??
       'messages-2023-12-15';
     const version =
       providerOptions?.['anthropicVersion'] ??
-      gatewayRequestBody?.['anthropic_version'] ??
+      (gatewayRequestBody as Params)?.['anthropic_version'] ??
       '2023-06-01';
 
     if (fn === 'chatComplete') {
-      headers['anthropic-beta'] = betaHeader;
+      headers['anthropic-beta'] = betaHeader as string;
     }
     headers['anthropic-version'] = version;
     return headers;

diff --git a/src/providers/anthropic/chatComplete.ts b/src/providers/anthropic/chatComplete.ts
@@ -3,8 +3,8 @@ import {
   Params,
   Message,
   ContentType,
-  SYSTEM_MESSAGE_ROLES,
   PromptCache,
+  SYSTEM_MESSAGE_ROLES,
 } from '../../types/requestBody';
 import {
   ChatCompletionResponse,
@@ -18,7 +18,6 @@ import {
   ANTHROPIC_STOP_REASON,
 } from './types';
 import {
-  generateErrorResponse,
   generateInvalidProviderResponseError,
   transformFinishReason,
 } from '../utils';
@@ -151,7 +150,7 @@ const transformAssistantMessage = (msg: Message): AnthropicMessage => {
         type: 'tool_use',
         name: toolCall.function.name,
         id: toolCall.id,
-        input: toolCall.function.arguments?.length
+        input: toolCall.function.arguments?.length // we need to send an empty object if the arguments are not provided
           ? JSON.parse(toolCall.function.arguments)
           : {},
         ...(toolCall.cache_control && {
@@ -263,7 +262,7 @@ export const AnthropicChatCompleteConfig: ProviderConfig = {
       transform: (params: Params) => {
         let messages: AnthropicMessage[] = [];
         // Transform the chat messages into a simple prompt
-        if (!!params.messages) {
+        if (params.messages) {
           params.messages.forEach((msg: Message & PromptCache) => {
             if (SYSTEM_MESSAGE_ROLES.includes(msg.role)) return;
 
@@ -315,7 +314,7 @@ export const AnthropicChatCompleteConfig: ProviderConfig = {
       transform: (params: Params) => {
         let systemMessages: AnthropicMessageContentItem[] = [];
         // Transform the chat messages into a simple prompt
-        if (!!params.messages) {
+        if (params.messages) {
           params.messages.forEach((msg: Message & PromptCache) => {
             if (
               SYSTEM_MESSAGE_ROLES.includes(msg.role) &&
@@ -494,13 +493,13 @@ export interface AnthropicChatCompleteStreamResponse {
     cache_read_input_tokens?: number;
   };
   message?: {
+    model?: string;
     usage?: {
       output_tokens?: number;
       input_tokens?: number;
       cache_creation_input_tokens?: number;
       cache_read_input_tokens?: number;
     };
-    model?: string;
   };
   error?: AnthropicErrorObject;
 }
@@ -527,7 +526,7 @@ export const AnthropicChatCompleteResponseTransform: (
       output_tokens = 0,
       cache_creation_input_tokens,
       cache_read_input_tokens,
-    } = response?.usage;
+    } = response?.usage ?? {};
 
     const shouldSendCacheUsage =
       cache_creation_input_tokens || cache_read_input_tokens;
@@ -602,7 +601,7 @@ export const AnthropicChatCompleteStreamChunkTransform: (
   response: string,
   fallbackId: string,
   streamState: AnthropicStreamState,
-  _strictOpenAiCompliance: boolean
+  strictOpenAiCompliance: boolean
 ) => string | undefined = (
   responseChunk,
   fallbackId,

diff --git a/src/providers/anthropic/complete.ts b/src/providers/anthropic/complete.ts
@@ -1,6 +1,7 @@
 import { ANTHROPIC } from '../../globals';
 import { Params } from '../../types/requestBody';
 import { CompletionResponse, ErrorResponse, ProviderConfig } from '../types';
+import { AnthropicErrorResponseTransform } from './utils';
 import {
   generateInvalidProviderResponseError,
   transformFinishReason,
@@ -10,7 +11,6 @@ import {
   AnthropicStreamState,
   AnthropicErrorResponse,
 } from './types';
-import { AnthropicErrorResponseTransform } from './utils';
 
 // TODO: this configuration does not enforce the maximum token limit for the input parameter. If you want to enforce this, you might need to add a custom validation function or a max property to the ParameterConfig interface, and then use it in the input configuration. However, this might be complex because the token count is not a simple length check, but depends on the specific tokenization method used by the model.
 
@@ -84,11 +84,8 @@ export const AnthropicCompleteResponseTransform: (
   _responseHeaders,
   strictOpenAiCompliance
 ) => {
-  if (responseStatus !== 200) {
-    const errorResposne = AnthropicErrorResponseTransform(
-      response as AnthropicErrorResponse
-    );
-    if (errorResposne) return errorResposne;
+  if (responseStatus !== 200 && 'error' in response) {
+    return AnthropicErrorResponseTransform(response);
   }
 
   if ('completion' in response) {

diff --git a/src/providers/anthropic/index.ts b/src/providers/anthropic/index.ts
@@ -25,8 +25,8 @@ const AnthropicConfig: ProviderConfigs = {
     'stream-complete': AnthropicCompleteStreamChunkTransform,
     complete: AnthropicCompleteResponseTransform,
     chatComplete: AnthropicChatCompleteResponseTransform,
-    'stream-chatComplete': AnthropicChatCompleteStreamChunkTransform,
     messages: AnthropicMessagesResponseTransform,
+    'stream-chatComplete': AnthropicChatCompleteStreamChunkTransform,
   },
 };
 

diff --git a/src/providers/anthropic/types.ts b/src/providers/anthropic/types.ts
@@ -1,4 +1,5 @@
 export type AnthropicStreamState = {
+  containsChainOfThoughtMessage?: boolean;
   toolIndex?: number;
   usage?: {
     prompt_tokens?: number;
@@ -9,6 +10,15 @@ export type AnthropicStreamState = {
   model?: string;
 };
 
+// https://docs.anthropic.com/en/api/messages#response-stop-reason
+export enum ANTHROPIC_STOP_REASON {
+  max_tokens = 'max_tokens',
+  stop_sequence = 'stop_sequence',
+  tool_use = 'tool_use',
+  end_turn = 'end_turn',
+  pause_turn = 'pause_turn',
+}
+
 export interface AnthropicErrorObject {
   type: string;
   message: string;
@@ -18,12 +28,3 @@ export interface AnthropicErrorResponse {
   type: string;
   error: AnthropicErrorObject;
 }
-
-// https://docs.anthropic.com/en/api/messages#response-stop-reason
-export enum ANTHROPIC_STOP_REASON {
-  max_tokens = 'max_tokens',
-  stop_sequence = 'stop_sequence',
-  tool_use = 'tool_use',
-  end_turn = 'end_turn',
-  pause_turn = 'pause_turn',
-}
diff --git a/src/providers/anyscale/chatComplete.ts b/src/providers/anyscale/chatComplete.ts
@@ -1,5 +1,5 @@
 import { ANYSCALE } from '../../globals';
-import { Params } from '../../types/requestBody';
+import { Message, Params } from '../../types/requestBody';
 import {
   ChatCompletionResponse,
   ErrorResponse,
@@ -22,7 +22,7 @@ export const AnyscaleChatCompleteConfig: ProviderConfig = {
     param: 'messages',
     default: '',
     transform: (params: Params) => {
-      return params.messages?.map((message) => {
+      return params.messages?.map((message: Message) => {
         if (message.role === 'developer') return { ...message, role: 'system' };
         return message;
       });

diff --git a/src/providers/azure-ai-inference/api.ts b/src/providers/azure-ai-inference/api.ts
@@ -33,10 +33,9 @@ const AzureAIInferenceAPI: ProviderAPIConfig = {
     if (azureFoundryUrl) {
       return azureFoundryUrl;
     }
-
     return '';
   },
-  headers: async ({ providerOptions, fn }) => {
+  headers: async ({ providerOptions, fn, c }) => {
     const {
       apiKey,
       azureExtraParameters,
@@ -91,6 +90,7 @@ const AzureAIInferenceAPI: ProviderAPIConfig = {
       const { azureManagedClientId } = providerOptions;
       const resource = 'https://cognitiveservices.azure.com/';
       const accessToken = await getAzureManagedIdentityToken(
+        c,
         resource,
         azureManagedClientId
       );
@@ -167,24 +167,54 @@ const AzureAIInferenceAPI: ProviderAPIConfig = {
           ? ENDPOINT_MAPPING[mappedFn]
           : `${ENDPOINT_MAPPING[mappedFn]}?${searchParamsString}`;
       }
-      case 'realtime':
-      case 'imageGenerate':
-      case 'imageEdit':
-      case 'createSpeech':
-      case 'createTranscription':
-      case 'createTranslation':
-      case 'cancelBatch':
-      case 'createBatch':
-      case 'getBatchOutput':
-      case 'retrieveBatch':
-      case 'listBatches':
-      case 'retrieveFile':
-      case 'listFiles':
-      case 'deleteFile':
+      case 'realtime': {
+        return `${ENDPOINT_MAPPING[mappedFn]}?${searchParamsString}`;
+      }
+      case 'imageGenerate': {
+        return `${ENDPOINT_MAPPING[mappedFn]}?${searchParamsString}`;
+      }
+      case 'imageEdit': {
+        return `${ENDPOINT_MAPPING[mappedFn]}?${searchParamsString}`;
+      }
+      case 'createSpeech': {
+        return `${ENDPOINT_MAPPING[mappedFn]}?${searchParamsString}`;
+      }
+      case 'createTranscription': {
+        return `${ENDPOINT_MAPPING[mappedFn]}?${searchParamsString}`;
+      }
+      case 'createTranslation': {
+        return `${ENDPOINT_MAPPING[mappedFn]}?${searchParamsString}`;
+      }
+      case 'uploadFile': {
+        return `${ENDPOINT_MAPPING[mappedFn]}?${searchParamsString}`;
+      }
+      case 'retrieveFile': {
+        return `${ENDPOINT_MAPPING[mappedFn]}?${searchParamsString}`;
+      }
+      case 'listFiles': {
+        return `${ENDPOINT_MAPPING[mappedFn]}?${searchParamsString}`;
+      }
+      case 'deleteFile': {
+        return `${ENDPOINT_MAPPING[mappedFn]}?${searchParamsString}`;
+      }
       case 'retrieveFileContent': {
         return `${ENDPOINT_MAPPING[mappedFn]}?${searchParamsString}`;
       }
-
+      case 'listBatches': {
+        return `${ENDPOINT_MAPPING[mappedFn]}?${searchParamsString}`;
+      }
+      case 'retrieveBatch': {
+        return `${ENDPOINT_MAPPING[mappedFn]}?${searchParamsString}`;
+      }
+      case 'cancelBatch': {
+        return `${ENDPOINT_MAPPING[mappedFn]}?${searchParamsString}`;
+      }
+      case 'getBatchOutput': {
+        return `${ENDPOINT_MAPPING[mappedFn]}?${searchParamsString}`;
+      }
+      case 'createBatch': {
+        return `${ENDPOINT_MAPPING[mappedFn]}?${searchParamsString}`;
+      }
       default:
         return '';
     }

diff --git a/src/providers/azure-ai-inference/chatComplete.ts b/src/providers/azure-ai-inference/chatComplete.ts
@@ -1,4 +1,4 @@
-import { Params } from '../../types/requestBody';
+import { Message, Params } from '../../types/requestBody';
 import { OpenAIErrorResponseTransform } from '../openai/utils';
 import {
   ChatCompletionResponse,
@@ -16,7 +16,7 @@ export const AzureAIInferenceChatCompleteConfig: ProviderConfig = {
     param: 'messages',
     default: '',
     transform: (params: Params) => {
-      return params.messages?.map((message) => {
+      return params.messages?.map((message: Message) => {
         if (message.role === 'developer') return { ...message, role: 'system' };
         return message;
       });