@@ -362,17 +362,17 @@ def _generate(self, input: PromptType, max_out_len: int,
362362 if 'error' in response :
363363 if response ['error' ]['code' ] == 'rate_limit_exceeded' :
364364 time .sleep (10 )
365- self .logger .warn ('Rate limit exceeded, retrying...' )
365+ self .logger .warning ('Rate limit exceeded, retrying...' )
366366 continue
367367 elif response ['error' ]['code' ] == 'insufficient_quota' :
368368 self .invalid_keys .add (key )
369- self .logger .warn (f'insufficient_quota key: { key } ' )
369+ self .logger .warning (f'insufficient_quota key: { key } ' )
370370 continue
371371 elif response ['error' ]['code' ] == 'invalid_prompt' :
372- self .logger .warn ('Invalid prompt:' , str (input ))
372+ self .logger .warning ('Invalid prompt:' , str (input ))
373373 return ''
374374 elif response ['error' ]['type' ] == 'invalid_prompt' :
375- self .logger .warn ('Invalid prompt:' , str (input ))
375+ self .logger .warning ('Invalid prompt:' , str (input ))
376376 return ''
377377
378378 self .logger .error (
@@ -420,7 +420,7 @@ def _init_tokenizer(self):
420420 f'Successfully load hf tokenizer: { self .tokenizer_path } ' )
421421 return
422422 except Exception as e :
423- self .logger .warn (f'Failed to load hf tokenizer: { repr (e )} ' )
423+ self .logger .warning (f'Failed to load hf tokenizer: { repr (e )} ' )
424424
425425 # Fallback to gpt-4 tokenizer
426426 if self .verbose :
@@ -625,34 +625,47 @@ def __init__(
625625 verbose = verbose ,
626626 max_workers = max_workers ,
627627 )
628- from openai import OpenAI
629-
630628 # support multiple api_base for acceleration
631629 if isinstance (openai_api_base , List ):
632630 self .openai_api_base = random .choice (openai_api_base )
633631 else :
634632 self .openai_api_base = openai_api_base
635633
636- if self .proxy_url or http_client_cfg :
637- if self .proxy_url :
638- http_client_cfg ['proxies' ] = {
639- 'http://' : self .proxy_url ,
640- 'https://' : self .proxy_url ,
641- }
642-
643- self .openai_client = OpenAI (
644- base_url = self .openai_api_base ,
645- api_key = key ,
646- http_client = httpx .Client (
647- ** http_client_cfg ) if http_client_cfg else None ,
648- )
649634 self .timeout = timeout
635+ self .http_client_cfg = http_client_cfg
636+ self .openai_client = self ._create_fresh_client ()
637+
650638 if self .verbose :
651639 self .logger .info (f'Used openai_client: { self .openai_client } ' )
652640 self .status_code_mappings = status_code_mappings
653641 self .think_tag = think_tag
654642 self .openai_extra_kwargs = openai_extra_kwargs
655643
644+ def _create_fresh_client (self ):
645+ """Create a fresh OpenAI client."""
646+ import httpx
647+ from openai import OpenAI
648+
649+ # Get current key (with key rotation)
650+ current_key = self ._next_valid_key ()
651+
652+ # Create fresh client with current key
653+ http_client_cfg = self .http_client_cfg .copy ()
654+ if self .proxy_url :
655+ http_client_cfg ['proxies' ] = {
656+ 'http://' : self .proxy_url ,
657+ 'https://' : self .proxy_url ,
658+ }
659+ limits = httpx .Limits (max_keepalive_connections = 2048 ,
660+ max_connections = 4096 )
661+ http_client = httpx .Client (** http_client_cfg ,
662+ timeout = httpx .Timeout (self .timeout ),
663+ limits = limits )
664+
665+ return OpenAI (base_url = self .openai_api_base ,
666+ api_key = current_key ,
667+ http_client = http_client )
668+
656669 def _generate (
657670 self ,
658671 input : PromptList | str ,
0 commit comments