Ray Serving on Kubernetes
1. Ray Serve
1.1 Installation
$ pip install transformers requests huggingface_hub sentencepiece
$ pip install ray[serve]
1.2 Serving Example
app.py
를 만들고 다음과 같이 코드를 작성합니다.
from ray import serve
from starlette.requests import Request
from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
@serve.deployment(num_replicas=2, ray_actor_options={"num_cpus": 0.5, "num_gpus": 0})
class Translator:
def __init__(self):
# Load model
self.tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_1.2B")
self.tokenizer.src_lang = 'en'
self.model = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_1.2B")
self.model.eval()
def translate(self, text: str) -> str:
dest_lang_id = self.tokenizer.get_lang_id('ko')
encoded_src = self.tokenizer(text, return_tensors="pt")
generated_tokens = self.model.generate(**encoded_src,
forced_bos_token_id=dest_lang_id,
max_length=200,
use_cache=True)
result = self.tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
return result
async def __call__(self, http_request: Request) -> str:
korean_text: str = await http_request.json()
return self.translate(korean_text)
translator = Translator.bind()
# if __name__ == '__main__':
# translator = Translator()
# print(translator.translate('self-belief and hard work will always earn you success'))
Ray Serving 은 다음과 같이 합니다.
# Server 올리기
$ serve run app:translator
# 테스트
$ curl localhost:8000 -H "Accept: application/json" \
-d '"self-belief and hard work will always earn you success"'
자신감과 열심히 일하면 항상 당신에게 성공을 가져올 것입니다.