Hello Mat

 找回密码
 立即注册
查看: 134|回复: 0

千问Qwen2大语言模型推理

[复制链接]

1345

主题

1576

帖子

10

金钱

管理员

Rank: 9Rank: 9Rank: 9

积分
22757
发表于 2025-9-30 20:43:13 | 显示全部楼层 |阅读模式
Qwen2-1.5B-Instruct,在huggingface下载
千问Qwen2大语言模型推理
  1. from dataclasses import dataclass, field
  2. from typing import Optional
  3. import torch
  4. import torch.nn as nn
  5. import transformers
  6. from transformers import AutoTokenizer, AutoModelForCausalLM, PreTrainedModel
  7. import numpy as np
  8. import random

  9. @dataclass
  10. class DecodeArguments:
  11.     llm_type: str = 'qwen2'
  12.     decode_type: str = 'llm'
  13.     max_new_tokens: int = 50
  14.     num_beams: int = 1
  15.     batch_size: int = 1
  16.     result_path: str = "result.txt"

  17. @dataclass
  18. class ModelArguments:
  19.     llm_model_name_or_path: Optional[str] = "D:/2-LearningCode/913_LM/wesr-main/Qwen2-1.5B-Instruct"
  20.     projector_hidden_size: int = 2048
  21.     projector_model_path: Optional[str] = field(default=None)

  22. class ProjectorConv1d(nn.Module):

  23.     def __init__(self, config, encoder_dim, llm_dim):
  24.         super().__init__()
  25.         self.k = config.encoder_projector_ds_rate
  26.         self.conv1d = nn.Conv1d(in_channels=encoder_dim,
  27.                                 out_channels=encoder_dim,
  28.                                 kernel_size=self.k,
  29.                                 stride=self.k,
  30.                                 padding=0)
  31.         self.linear1 = nn.Linear(encoder_dim, config.projector_hidden_size)
  32.         self.relu1 = nn.ReLU()
  33.         self.linear2 = nn.Linear(config.projector_hidden_size, llm_dim)
  34.         self.relu2 = nn.ReLU()

  35.     def forward(self, x):
  36.         x = x.transpose(1, 2)
  37.         x = self.conv1d(x)
  38.         x = x.transpose(1, 2)
  39.         x = self.relu1(x)
  40.         x = self.linear1(x)
  41.         x = self.relu2(x)
  42.         x = self.linear2(x)
  43.         return x

  44. def freeze_model(model):
  45.     for _, param in model.named_parameters():
  46.         param.requires_grad = False

  47. class SpeechLLM(PreTrainedModel):
  48.     supports_gradient_checkpointing = True

  49.     def __init__(
  50.         self,
  51.         llm: nn.Module,
  52.         config,
  53.         model_args: ModelArguments,
  54.     ):
  55.         super().__init__(config)
  56.         self.llm = llm
  57.         
  58.     @torch.autocast(device_type="cuda", dtype=torch.bfloat16)
  59.     def generate(
  60.         self,
  61.         input_ids: torch.LongTensor = None,
  62.         attention_mask: Optional[torch.Tensor] = None,
  63.         eos_token_id=None,
  64.         decode_config=None,
  65.     ):
  66.         text_emb = self.llm.get_input_embeddings()(input_ids)
  67.         model_outputs = self.llm.generate(
  68.             inputs_embeds=text_emb,
  69.             attention_mask=attention_mask,
  70.             do_sample=False,
  71.             top_p=1.0,
  72.             num_beams=decode_config.num_beams,
  73.             max_new_tokens=decode_config.max_new_tokens,
  74.             eos_token_id=eos_token_id,
  75.         )
  76.         return model_outputs

  77. def init_model(model_args):
  78.     # Load llm model and tokenizer
  79.     config = transformers.AutoConfig.from_pretrained(
  80.         model_args.llm_model_name_or_path)
  81.     config.use_cache = False
  82.     llm_model = AutoModelForCausalLM.from_pretrained(
  83.         model_args.llm_model_name_or_path,
  84.         config=config,
  85.         torch_dtype='auto',
  86.     )

  87.     model = SpeechLLM(llm_model, config, model_args)
  88.    
  89.     if model_args.projector_model_path is not None:
  90.         model.load_projector(model_args.projector_model_path)
  91.         
  92.     return model

  93. def set_seed(seed=42):
  94.     # 设置PyTorch的随机种子
  95.     torch.manual_seed(seed)
  96.     # 如果使用GPU,还需要设置CUDA的随机种子
  97.     if torch.cuda.is_available():
  98.         torch.cuda.manual_seed(seed)
  99.         torch.cuda.manual_seed_all(seed)  # 多GPU时使用
  100.         # 确保每次返回的卷积算法是确定的
  101.         torch.backends.cudnn.deterministic = True
  102.         torch.backends.cudnn.benchmark = False
  103.    
  104.     # 设置numpy的随机种子
  105.     np.random.seed(seed)
  106.     # 设置Python的随机种子
  107.     random.seed(seed)
  108.    
  109. def main():
  110.     set_seed()
  111.     model_args = ModelArguments()
  112.     decode_args = DecodeArguments()
  113.    
  114.     model = init_model(model_args)
  115.     tokenizer = AutoTokenizer.from_pretrained(model_args.llm_model_name_or_path)
  116.    
  117.     text = "halcom.cn, welcome to halcom.cn"
  118.     ids = tokenizer.encode(text)
  119.     print("Encoded:", ids)
  120.     print("Decoded:", tokenizer.decode(ids))
  121.    
  122.     if decode_args.llm_type == 'qwen2':
  123.         eos_token_id = tokenizer.convert_tokens_to_ids(['<|endoftext|>', '<|im_end|>'])
  124.     else:
  125.         tokenizer.pad_token = '<|finetune_right_pad_id|>'
  126.         eos_token_id = tokenizer.convert_tokens_to_ids(['<|end_of_text|>', '<|eot_id|>'])
  127.    
  128.     device = torch.device('cuda:0')
  129.     if torch.cuda.is_available():
  130.         model = model.cuda()
  131.     else:
  132.         device = torch.device('cpu')  

  133.     model.eval()
  134.    
  135.     prompt = '编写一段代码'

  136.     vocab_size = max(eos_token_id)  # Reduced to avoid vocab size error
  137.     max_seq_len = 50
  138.     input_ids = tokenizer.encode(prompt)
  139.     input_ids = [i for i in input_ids if i < vocab_size][:max_seq_len]  # 修复:过滤非法 token id
  140.     input_tensor = torch.tensor([input_ids], dtype=torch.long).to(device)
  141.     seq_len = input_tensor.size(1)
  142.     mask = torch.tril(torch.ones(1, seq_len)).to(device)
  143.    
  144.     generated_ids = model.generate(input_tensor, mask,
  145.                                 eos_token_id=eos_token_id,
  146.                                 decode_config=decode_args)
  147.     # generated_text = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
  148.     generated_text = tokenizer.decode(generated_ids.squeeze(0), skip_special_tokens=True)
  149.    
  150.     print(f'输入: {prompt}\n生成: {generated_text}')

  151. if __name__ == "__main__":
  152.     main()
复制代码



算法QQ  3283892722
群智能算法链接http://halcom.cn/forum.php?mod=forumdisplay&fid=73
回复

使用道具 举报

您需要登录后才可以回帖 登录 | 立即注册

本版积分规则

Python|Opencv|MATLAB|Halcom.cn ( 蜀ICP备16027072号 )

GMT+8, 2025-10-10 22:19 , Processed in 0.158344 second(s), 20 queries .

Powered by Discuz! X3.4

Copyright © 2001-2021, Tencent Cloud.

快速回复 返回顶部 返回列表