|
Qwen2-1.5B-Instruct,在huggingface下载
千问Qwen2大语言模型推理
- from dataclasses import dataclass, field
- from typing import Optional
- import torch
- import torch.nn as nn
- import transformers
- from transformers import AutoTokenizer, AutoModelForCausalLM, PreTrainedModel
- import numpy as np
- import random
- @dataclass
- class DecodeArguments:
- llm_type: str = 'qwen2'
- decode_type: str = 'llm'
- max_new_tokens: int = 50
- num_beams: int = 1
- batch_size: int = 1
- result_path: str = "result.txt"
- @dataclass
- class ModelArguments:
- llm_model_name_or_path: Optional[str] = "D:/2-LearningCode/913_LM/wesr-main/Qwen2-1.5B-Instruct"
- projector_hidden_size: int = 2048
- projector_model_path: Optional[str] = field(default=None)
- class ProjectorConv1d(nn.Module):
- def __init__(self, config, encoder_dim, llm_dim):
- super().__init__()
- self.k = config.encoder_projector_ds_rate
- self.conv1d = nn.Conv1d(in_channels=encoder_dim,
- out_channels=encoder_dim,
- kernel_size=self.k,
- stride=self.k,
- padding=0)
- self.linear1 = nn.Linear(encoder_dim, config.projector_hidden_size)
- self.relu1 = nn.ReLU()
- self.linear2 = nn.Linear(config.projector_hidden_size, llm_dim)
- self.relu2 = nn.ReLU()
- def forward(self, x):
- x = x.transpose(1, 2)
- x = self.conv1d(x)
- x = x.transpose(1, 2)
- x = self.relu1(x)
- x = self.linear1(x)
- x = self.relu2(x)
- x = self.linear2(x)
- return x
- def freeze_model(model):
- for _, param in model.named_parameters():
- param.requires_grad = False
- class SpeechLLM(PreTrainedModel):
- supports_gradient_checkpointing = True
- def __init__(
- self,
- llm: nn.Module,
- config,
- model_args: ModelArguments,
- ):
- super().__init__(config)
- self.llm = llm
-
- @torch.autocast(device_type="cuda", dtype=torch.bfloat16)
- def generate(
- self,
- input_ids: torch.LongTensor = None,
- attention_mask: Optional[torch.Tensor] = None,
- eos_token_id=None,
- decode_config=None,
- ):
- text_emb = self.llm.get_input_embeddings()(input_ids)
- model_outputs = self.llm.generate(
- inputs_embeds=text_emb,
- attention_mask=attention_mask,
- do_sample=False,
- top_p=1.0,
- num_beams=decode_config.num_beams,
- max_new_tokens=decode_config.max_new_tokens,
- eos_token_id=eos_token_id,
- )
- return model_outputs
- def init_model(model_args):
- # Load llm model and tokenizer
- config = transformers.AutoConfig.from_pretrained(
- model_args.llm_model_name_or_path)
- config.use_cache = False
- llm_model = AutoModelForCausalLM.from_pretrained(
- model_args.llm_model_name_or_path,
- config=config,
- torch_dtype='auto',
- )
- model = SpeechLLM(llm_model, config, model_args)
-
- if model_args.projector_model_path is not None:
- model.load_projector(model_args.projector_model_path)
-
- return model
- def set_seed(seed=42):
- # 设置PyTorch的随机种子
- torch.manual_seed(seed)
- # 如果使用GPU,还需要设置CUDA的随机种子
- if torch.cuda.is_available():
- torch.cuda.manual_seed(seed)
- torch.cuda.manual_seed_all(seed) # 多GPU时使用
- # 确保每次返回的卷积算法是确定的
- torch.backends.cudnn.deterministic = True
- torch.backends.cudnn.benchmark = False
-
- # 设置numpy的随机种子
- np.random.seed(seed)
- # 设置Python的随机种子
- random.seed(seed)
-
- def main():
- set_seed()
- model_args = ModelArguments()
- decode_args = DecodeArguments()
-
- model = init_model(model_args)
- tokenizer = AutoTokenizer.from_pretrained(model_args.llm_model_name_or_path)
-
- text = "halcom.cn, welcome to halcom.cn"
- ids = tokenizer.encode(text)
- print("Encoded:", ids)
- print("Decoded:", tokenizer.decode(ids))
-
- if decode_args.llm_type == 'qwen2':
- eos_token_id = tokenizer.convert_tokens_to_ids(['<|endoftext|>', '<|im_end|>'])
- else:
- tokenizer.pad_token = '<|finetune_right_pad_id|>'
- eos_token_id = tokenizer.convert_tokens_to_ids(['<|end_of_text|>', '<|eot_id|>'])
-
- device = torch.device('cuda:0')
- if torch.cuda.is_available():
- model = model.cuda()
- else:
- device = torch.device('cpu')
- model.eval()
-
- prompt = '编写一段代码'
- vocab_size = max(eos_token_id) # Reduced to avoid vocab size error
- max_seq_len = 50
- input_ids = tokenizer.encode(prompt)
- input_ids = [i for i in input_ids if i < vocab_size][:max_seq_len] # 修复:过滤非法 token id
- input_tensor = torch.tensor([input_ids], dtype=torch.long).to(device)
- seq_len = input_tensor.size(1)
- mask = torch.tril(torch.ones(1, seq_len)).to(device)
-
- generated_ids = model.generate(input_tensor, mask,
- eos_token_id=eos_token_id,
- decode_config=decode_args)
- # generated_text = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
- generated_text = tokenizer.decode(generated_ids.squeeze(0), skip_special_tokens=True)
-
- print(f'输入: {prompt}\n生成: {generated_text}')
- if __name__ == "__main__":
- main()
复制代码
|
|