-
Notifications
You must be signed in to change notification settings - Fork 417
Expand file tree
/
Copy pathgenerate_chat.py
More file actions
executable file
·60 lines (50 loc) · 1.83 KB
/
generate_chat.py
File metadata and controls
executable file
·60 lines (50 loc) · 1.83 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
# Copyright © 2023 BAAI. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License")
import os
import torch
from flagai.auto_model.auto_loader import AutoLoader
from flagai.model.predictor.predictor import Predictor
from flagai.model.predictor.aquila import aquila_generate
from flagai.data.tokenizer import Tokenizer
state_dict = "./checkpoints_in"
model_name = 'aquilachat-7b'
loader = AutoLoader("lm",
model_dir=state_dict,
model_name=model_name,
use_cache=True,
device='cuda',
fp16=True)
model = loader.get_model()
tokenizer = loader.get_tokenizer()
cache_dir = os.path.join(state_dict, model_name)
model.eval()
model.half()
model.cuda()
predictor = Predictor(model, tokenizer)
texts = [
"北京为什么是中国的首都?",
"1+1=",
"为什么湘菜那么甜?",
"东三省和海南岛的区别?",
]
for text in texts:
print('-' * 80)
print(f"text is {text}")
from cyg_conversation import default_conversation
conv = default_conversation.copy()
conv.append_message(conv.roles[0], text)
conv.append_message(conv.roles[1], None)
tokens = tokenizer.encode_plus(f"{conv.get_prompt()}",
None,
max_length=None)['input_ids']
## TODO for few-shot inference using plain text as inputs will get better results.
## tokens = tokenizer.encode_plus(f"{text}", None, max_length=None)['input_ids']
tokens = tokens[1:-1]
with torch.no_grad():
out = aquila_generate(tokenizer,
model, [text],
max_gen_len := 200,
top_p=0.95,
prompts_tokens=[tokens])
print(f"pred is {out}")