FlagAI/examples/Aquila/Aquila-chat/generate_chat.py at master · FlagAI-Open/FlagAI · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
# Copyright © 2023 BAAI. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License")
import os
import torch
from flagai.auto_model.auto_loader import AutoLoader
from flagai.model.predictor.predictor import Predictor
from flagai.model.predictor.aquila import aquila_generate
from flagai.data.tokenizer import Tokenizer

state_dict = "./checkpoints_in"
model_name = 'aquilachat-7b'

loader = AutoLoader("lm",
                    model_dir=state_dict,
                    model_name=model_name,
                    use_cache=True,
                    device='cuda',
                    fp16=True)
model = loader.get_model()
tokenizer = loader.get_tokenizer()
cache_dir = os.path.join(state_dict, model_name)

model.eval()
model.half()
model.cuda()

predictor = Predictor(model, tokenizer)

texts = [
    "北京为什么是中国的首都？",
    "1+1=",
    "为什么湘菜那么甜？",
    "东三省和海南岛的区别？",
]

for text in texts:
    print('-' * 80)
    print(f"text is {text}")

    from cyg_conversation import default_conversation

    conv = default_conversation.copy()
    conv.append_message(conv.roles[0], text)
    conv.append_message(conv.roles[1], None)

    tokens = tokenizer.encode_plus(f"{conv.get_prompt()}",
                                   None,
                                   max_length=None)['input_ids']
    ## TODO for few-shot inference using plain text as inputs will get better results.
    ## tokens = tokenizer.encode_plus(f"{text}", None, max_length=None)['input_ids']
    tokens = tokens[1:-1]

    with torch.no_grad():
        out = aquila_generate(tokenizer,
                              model, [text],
                              max_gen_len := 200,
                              top_p=0.95,
                              prompts_tokens=[tokens])
        print(f"pred is {out}")