LLM - UPSTAGE SOLAR 10.7B v1.0
Personal Experience
- 한글 안되는 듯 합니다.
Installation
$ pip install bitsandbytes datasets accelerate peft trl
Quick Code
Import Libraries
import argparse
import os
import warnings
import torch
import transformers
from accelerate import init_empty_weights, load_checkpoint_and_dispatch
from transformers import (
AutoModelForCausalLM,
AutoTokenizer,
GenerationConfig,
StoppingCriteria,
StoppingCriteriaList,
)
warnings.filterwarnings("ignore")
torch.cuda.empty_cache()
Loading Model and Tokenizer
MODEL_ID = "Upstage/SOLAR-10.7B-v1.0"
tokenizer = transformers.AutoTokenizer.from_pretrained(MODEL_ID)
model = AutoModelForCausalLM.from_pretrained(
MODEL_ID,
torch_dtype=torch.float16,
device_map="auto",
offload_folder="./offload",
low_cpu_mem_usage=True,
)
model.eval()
Inference
text = '''
한글 가능해?
'''
inputs = tokenizer(text, return_tensors="pt")
inputs = {k: v.to("cuda") for k, v in inputs.items()}
generation_config = GenerationConfig(
temperature=0.1,
max_new_tokens=256,
eos_token_id=tokenizer.eos_token_id,
repetition_penalty=1.2,
do_sample=True,
top_p=0.9,
min_length=5,
use_cache=True,
)
with torch.no_grad():
outputs = model.generate(**inputs, generation_config=generation_config)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))
일단 한글은 안됩니다.
한글 가능해?
#include <iostream>
using namespace std;
int main() {
cout << "Hello World!" << endl; //endl은 \n과같음
return 0;
}