整理ollma接口,添加token个数限制接口

This commit is contained in:
997146918 2025-07-02 17:49:02 +08:00
parent 260b014a84
commit 5705ab962a
3 changed files with 29 additions and 4 deletions

21
AIGC/AICore.py Normal file
View File

@ -0,0 +1,21 @@
import requests
from ollama import Client, ResponseError
import tiktoken
class AICore:
modelMaxTokens = 128000
# 初始化 DeepSeek 使用的 Tokenizer (cl100k_base)
encoder = tiktoken.get_encoding("cl100k_base")
def __init__(self, model):
#初始化ollama客户端
ollamaClient = Client(host='http://localhost:11434', headers={'x-some-header': 'some-value'})
response = ollamaClient.show(model)
modelMaxTokens = response.modelinfo['qwen2.context_length']
def getPromptToken(self, prompt)-> int:
tokens = self.encoder.encode(prompt)
return len(tokens)

View File

@ -11,8 +11,9 @@ from fastapi import FastAPI, Request, HTTPException, WebSocket, WebSocketDisconn
from fastapi.websockets import WebSocketState
from h11 import ConnectionClosed
import uvicorn
from AICore import AICore
from Utils.AIGCLog import AIGCLog
from ollama import Client, ResponseError
app = FastAPI(title = "AI 通信服务")
logger = AIGCLog(name = "AIGC", log_file = "aigc.log")
@ -27,8 +28,8 @@ logger.log(logging.INFO, f"使用的模型是 {args.model}")
maxAIRegerateCount = 5
lastPrompt = ""
#初始化ollama客户端
ollamaClient = Client(host='http://localhost:11434')
aicore = AICore(args.model)
async def heartbeat(websocket: WebSocket):
pass
@ -150,6 +151,7 @@ async def generateAIChat(promptStr: str, websocket: WebSocket| None = None):
{"role": "system", "content": promptStr}
]
try:
# response = ollamaClient.chat(
# model = args.model,
# stream = False,
@ -235,6 +237,7 @@ if __name__ == "__main__":
server_thread.start()
# Test
aicore.getPromptToken("测试功能")
asyncio.run(
generateAIChat(promptStr = f"""
#你是一个游戏NPC对话生成器。请严格按以下要求生成两个角色的日常对话

View File

@ -1,3 +1,4 @@
uvicorn[standard]
fastapi
ollama
tiktoken