前提:
配置好
本文采用FastAPI+Python 使用阿里千问开发人工智能产品产品,界面如下:

支持文本对话、图像生成、图像识别、语音输入(仅适用PC环境)

后端
func_calling.py
这段代码实现了一个邮件发送系统,并集成了函数调用(Function Calling)能力
import smtplib, time, os # smtplib模块主要用于处理SMTP协议
# email模块主要处理邮件的头和正文等数据
# from email.mime.application import MIMEApplication
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from dotenv import load_dotenv
load_dotenv()
def send_email(receiver, content, subject=None):
sender = 'xianggu625@126.com' # 发送邮箱
# 构建邮件的主体对象
msg = MIMEMultipart()
if subject is not None:
msg['Subject'] = subject
else:
msg['Subject'] = f"来自{sender}的问候邮件"
msg['From'] = sender
msg['To'] = receiver
# 构建邮件的正文内容# 构建邮件的正文内容
body = MIMEText(content, 'html', 'utf-8')
msg.attach(body)
# 建立与邮件服务器的连接并发送邮件
smtpObj = smtplib.SMTP_SSL('smtp.126.com', 465) # 如果基于SSL,则 smtplib.SMTP_SSL
smtpObj.login(user=sender, password=os.getenv("Mail_Password"))
smtpObj.sendmail(sender, receiver, str(msg))
smtpObj.quit()
return "邮件已经成功发送到:" + receiver
# 系统级提示词
system_prompt = """
你是一名AI助手,具备函数调用的能力,但是如果提供的信息已经足够回答用户的问题,则不需要再进行函数调用。
同时,请严格按照函数调用的方式进行处理,如果用户未提供函数所需参数,则必须询问,而不能自作主张。
"""
# 声明函数及参数说明,注意中文描述要尽量准确,便于大模型理解
functions = [
{
"type": "function",
"function": {
"name": "send_email",
"description": "向指定邮箱地址发送一封邮件",
"parameters": {
"type": "object",
"properties": {
"receiver": {
"type": "string",
"description": "邮件的收件地址",
},
"content": {
"type": "string",
"description": "邮件的正文内容,支持HTML格式",
},
"subject": {
"type": "string",
"description": "邮件的标题,如果没有标题,可以设置为空",
},
},
"required": ["receiver", "content"]
},
}
}
]
# 对邮件发送功能进行测试
if __name__ == '__main__':
send_email("xianggu625@126.com", "祝你节日快乐,工作顺利。")module.py
这段代码实现了一个阿里云智能搜索API的Python客户端,对阿里云OpenSearch服务(智能搜索能力)的调用,用于在文档、网页等数据中进行语义搜索并返回摘要结果。
import requests
import os
import json
from dotenv import load_dotenv
load_dotenv()
def aliyun_search(content):
print(f"🔍 搜索内容: {content}")
print(f"📝 使用的密钥前几位: {os.getenv('Aliyun_Search_Key')[:15] if os.getenv('Aliyun_Search_Key') else '无密钥'}")
url = "http://default-cu35.platform-cn-shanghai.opensearch.aliyuncs.com/v3/openapi/workspaces/default/web-search/ops-web-search-001"
header = {"Content-Type": "application/json", "Authorization": f"Bearer {os.getenv('Aliyun_Search_Key')}"}
data = {"query": content, "top_k": 3, "way": "full", "content_type": "summary"}
print(f"🌐 请求URL: {url}")
print(f"📦 请求数据: {data}")
try:
resp = requests.post(url, headers=header, json=data, timeout=10)
print(f"📡 状态码: {resp.status_code}")
if resp.status_code != 200:
print(f"❌ 请求失败! 响应内容: {resp.text}")
return []
# 打印完整响应(用于调试)
response_json = resp.json()
print("📄 完整响应结构:")
print(json.dumps(response_json, indent=2, ensure_ascii=False, default=str))
# 安全地获取结果
if 'result' in response_json and 'search_result' in response_json['result']:
results = response_json['result']['search_result']
elif 'search_result' in response_json:
results = response_json['search_result']
elif 'data' in response_json and 'search_result' in response_json['data']:
results = response_json['data']['search_result']
elif 'items' in response_json:
results = response_json['items']
elif 'hits' in response_json and 'hits' in response_json['hits']:
results = response_json['hits']['hits']
else:
print("⚠️ 警告: 未找到预期的响应结构")
# 尝试直接返回整个响应或空数组
results = response_json if isinstance(response_json, list) else []
print(f"✅ 成功解析 {len(results)} 个结果")
return results
except requests.exceptions.Timeout:
print("⏰ 请求超时")
return []
except json.JSONDecodeError:
print("❌ 响应不是有效的JSON格式")
print(f"原始响应: {resp.text[:500]}...")
return []
except Exception as e:
print(f"💥 搜索请求出错: {type(e).__name__}: {e}")
return []generate.py
图像生成
from fastapi import APIRouter, Body
from http import HTTPStatus
import os, requests
from dashscope import ImageSynthesis # 引入SDK
from dotenv import load_dotenv
load_dotenv()
generate = APIRouter()
# 本功能使用通义万相V2版本实现,文生图不支持OpenAI的SDK,需要使用Dashscope的SDK
# 文生图也不存在流式响应,因为响应的内容是一张生成的图片
@generate.post("/generate")
def generate_image(data: dict=Body()):
api_key=os.getenv("Dashscope_API_Key")
rsp = ImageSynthesis.call(api_key=api_key,
model="wanx2.1-t2i-turbo", # 模型名称
prompt=data['content'], # 提示词
n=1, # 生成图片数量
size='1024*1024') # 图片尺寸
if rsp.status_code == HTTPStatus.OK:
# 在images目录下保存图片
for result in rsp.output.results:
# result.url为生成图片的在线地址
file_name = result.url.split('/')[-1].split("?")[0]
with open(f'./static/images/{file_name}', 'wb') as f:
f.write(requests.get(result.url).content)
# 将图片的URL地址响应给前端,以便于前端进行渲染
return {"message":"successful", "image_url": f"/static/images/{file_name}"}recognize.py
图像识别
from fastapi import APIRouter, Body
from fastapi.responses import StreamingResponse
import json, os, base64
from openai import OpenAI
from dotenv import load_dotenv
load_dotenv()
recog = APIRouter()
# 图像识别通常为单次对话,可以不保存历史记录
@recog.post("/recognize")
def recognize_image(data: dict=Body()):
# JS提交过来的Base64格式大致为:data:image/jpeg;base64,9j/4AAQSkZJRgA
# 如果发送给千问VL图像识别模型,则可以直接发送
# 如果要保存到本地,则需要提取其中的Base64字符串正文部分
b64str = data['base64'].split(',')[1]
def stream_chat():
client = OpenAI(api_key=os.getenv("Dashscope_API_Key"),
base_url="https://dashscope.aliyuncs.com/compatible-mode/v1")
completion = client.chat.completions.create(
model="qwen-vl-max-latest",
messages=[
{"role": "system","content": "你是一名专业的AI助手,可以帮助用户解答任何问题,也能以精准简洁的语言识别并描述出图像的内容。"},
{"role": "user", "content": [{
"type": "image_url", "image_url": data['base64']
},
{"type": "text", "text": data['content']}]}
],
stream=True
)
for chunk in completion:
# 使用生成器迭代输出每一个数据流
choice = chunk.choices[0].delta.content
yield json.dumps({"content": choice}) + "\n"
# 以流式响应的方式响应给前端
return StreamingResponse(stream_chat(), media_type="text/event-stream")qa.py
提供一个支持「流式响应 + 外部知识检索 + 函数调用」的智能对话 API**,专为构建类 ChatGPT 的 Web/移动端 AI 助手而设计
from fastapi import APIRouter, Body
from fastapi.responses import StreamingResponse
import json, os
from openai import OpenAI
from dotenv import load_dotenv
load_dotenv()
from module import aliyun_search
from func_calling import send_email,functions
qa = APIRouter()
messages = [{
"role": "system",
"content": "你是一名专业的AI助手,可以帮助用户解答任何问题。"
}]
# 定义接口,并将生成器输出封装到响应中
@qa.post("/stream")
def stream(question: dict=Body()):
# 读取JSON参数的值
content = question['content']
search = question['search']
if search == True:
search_result = aliyun_search(content)
# print(search_result)
message = {"role": "user", "content": f"请使用以下内容:\n{search_result}\n,并基于用户的提问:\n{content}\n来进行回答。"}
else:
message = {"role": "user", "content": content}
# 增加函数调用的功能,让大模型理解用户的提问后返回函数调用的声明
def check_func_call(message):
messages.append(message)
client = OpenAI(api_key=os.getenv("Dashscope_API_Key"),
base_url="https://dashscope.aliyuncs.com/compatible-mode/v1")
completion = client.chat.completions.create(
model="qwen-plus",
messages=messages,
stream=False, # 函数调用不能与流式响应同时处理
tools=functions
)
return completion.choices[0].message
# stream_chat中不再运行messages.append(message)
def stream_chat():
# messages.append(message)
# print(messages)
client = OpenAI(api_key=os.getenv("Dashscope_API_Key"),
base_url="https://dashscope.aliyuncs.com/compatible-mode/v1")
completion = client.chat.completions.create(
model="qwen-plus",
messages=messages,
stream=True,
stream_options={"include_usage": False}
)
# 定义变量reply用于保存本次回复内容,以便于实现聊天记忆功能
reply = ""
for chunk in completion:
# 使用生成器迭代输出每一个数据流
choice = chunk.choices[0].delta.content
reply += choice
yield json.dumps({"content": choice}) + "\n"
# 循环结束后,将AI回复也添加到messages中
messages.append({"role": "assistant", "content": reply})
# 首先调用check_func_call,确认是否有函数调用
# 如果有,则进行函数调用,并让大模型返回一个函数调用的结果给用户
# 如果不存在函数调用的情况,则直接将用户的提问交给stream_chat来完成
output = check_func_call(message)
if output.tool_calls:
func_name = output.tool_calls[0].function.name
func_args = eval(output.tool_calls[0].function.arguments)
func = globals()[func_name]
result = func(**func_args)
messages.append({"role": "user", "content": f"请将以下内容直接回复给用户: {result}"})
else:
messages.append(message) # 此处已经添加用户问题,所以stream_chat中不再添加
return StreamingResponse(stream_chat(), media_type="text/event-stream")main.py
后端主文件
from fastapi import FastAPI, Request
from fastapi.templating import Jinja2Templates
from qa import qa
from recognize import recog
from generate import generate
import uvicorn, os
from dotenv import load_dotenv
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from fastapi.staticfiles import StaticFiles
# 先加载环境变量
load_dotenv()
app = FastAPI(
title="AI助手API (含MCP功能)",
description="集成高德地图MCP功能的AI助手",
version="1.0.0"
)
# CORS配置
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# 设置静态目录
app.mount("/static", StaticFiles(directory="static"), name="static")
templates = Jinja2Templates(directory="templates")
# 导入并包含路由
from qa import qa
from recognize import recog
from generate import generate
# 延迟导入,避免循环导入问题
try:
from api.token import router as token_router
app.include_router(token_router)
print("✅ Token路由注册成功")
except ImportError as e:
print(f"⚠️ Token路由导入失败: {e}")
# 包含其他路由
app.include_router(qa)
app.include_router(recog)
app.include_router(generate)
@app.get('/')
def chat(request: Request):
return templates.TemplateResponse(request=request, name="index.html")
@app.get('/api/info')
async def api_info():
"""API信息"""
return {
"service": "AI Assistant with MCP",
"version": "1.0.0",
"features": ["普通聊天", "高德地图MCP集成"],
"endpoints": {
"POST /stream": "流式聊天接口",
"GET /health": "健康检查",
"GET /api/info": "API信息"
},
"config_status": {
"dashscope": bool(os.getenv("Dashscope_API_Key")),
"amap": bool(os.getenv("AMAP_API_KEY"))
}
}
if __name__ == '__main__':
#uvicorn
.run(app)
uvicorn.run(app, host="0.0.0.0", port=8000, log_level="info")前端
templates\index.html
<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no">
<title>智能多模态AI助手</title>
<script src="/static/script.js"></script>
<script src="/static/script1.js"></script>
<script src="/static/script_moblie.js"></script>
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css">
<link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap" rel="stylesheet">
<link rel="stylesheet" href="../static/styles_mobile.css">
</head>
<body>
<div class="app-container">
<header>
<div class="logo">
<div class="logo-icon">
<i class="fas fa-robot"></i>
</div>
<div>
<h1>多模态AI助手</h1>
<p class="tagline">支持对话、识图、绘图与语音交互</p>
</div>
</div>
<div class="mode-selector">
<button class="mode-btn active" data-mode="chat">
<i class="fas fa-comments"></i> 对话
</button>
</div>
</header>
<div class="main-content">
<div class="chat-container">
<div class="chat-header">
<h2>AI对话</h2>
<button class="clear-chat-btn" id="clearChat" onclick="clearChat()">
<i class="fas fa-trash-alt"></i>
清空对话
</button>
</div>
<div class="net-search">
<input type="checkbox" id="net-search"/>联网
</div>
<div class="messages-container" id="chatbox">
<!-- 消息会动态添加到这里 -->
<div class="message ai">
<div class="avatar">
<i class="fas fa-robot"></i>
</div>
<div class="message-content">
<div class="message-text">
您好!我是多模态AI助手,支持文本对话、图像识别、图像生成和语音输入。请问有什么可以帮助您的?
</div>
</div>
</div>
<div class="message user">
<div class="avatar">
<i class="fas fa-user"></i>
</div>
<div class="message-content">
<div class="message-text">
请帮我生成一张日出的风景图片
</div>
</div>
</div>
<div class="message ai">
<div class="avatar">
<i class="fas fa-robot"></i>
</div>
<div class="message-content">
<div class="message-text">
这是根据您的要求生成的日出风景图:
</div>
<img src="https://images.unsplash.com/photo-1506905925346-21bda4d32df4?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=1350&q=80" alt="日出风景" class="message-image">
<div class="message-text" style="margin-top: 0.8rem; font-size: 0.9rem; color:
#9ca3af
;">
图片已生成,展现了日出时分的美丽景色,包含了山脉、云海和温暖的阳光。
</div>
</div>
</div>
</div>
<div class="input-area">
<div class="action-buttons">
<button class="action-btn" title="上传图片" id="recognize-image" onclick="addImage()">
<i class="fas fa-image"></i>
</button>
<button class="action-btn" id="voiceBtn" title="语音输入" onclick="connectWebSocket()">
<i class="fas fa-microphone"></i>
</button>
<button class="action-btn" id="attachBtn" title="停止录音" onclick="stopRecording()">
<i class="fas fa-stop-circle"></i>
</button>
</div>
<textarea id="question" onkeyup="doEnter(event)" class="text-input" placeholder="输入您的问题或指令..."></textarea>
<div class="send-buttons">
<button class="send-btn" id="qa-button" onclick="doAsk()">
<i class="fas fa-paper-plane"></i>
</button>
<button class="generate-image" id="generate-image" onclick="generateImage()">
<i class="fas fa-wand-magic-sparkles"></i>
</button>
</div>
</div>
<div class="status info" id="typingStatus">
<i class="fas fa-circle-notch fa-spin"></i>
<span>AI正在思考...</span>
</div>
</div>
</div>
<div class="side-panel">
<div class="panel-section" id="imageDiv" style="display: none;">
<h3><i class="fas fa-eye"></i> 图像识别</h3>
<input type="file" id="imageInput" onchange="saveAndPreview()" style="display: none;">
<div class="image-preview" id="imagePreview">
<img id="preview">
<i class="fas fa-image" style="font-size: 3rem; color: #475569;"></i>
</div>
<p style="color: #6b7280; font-size: 0.9rem; margin-bottom: 1rem;">
上传图片后,AI将分析图片内容并描述识别结果。
</p>
</div>
<div class="status success" id="successStatus">
<i class="fas fa-check-circle"></i>
<span>图像生成成功!</span>
</div>
<div class="status error" id="errorStatus">
<i class="fas fa-exclamation-circle"></i>
<span>发生错误,请重试。</span>
</div>
</div>
</div>
<footer>
<p>多模态AI助手 © 2025 | 支持文本、图像、语音交互</p>
<div class="footer-links">
<a href="#"><i class="fas fa-shield-alt"></i> 隐私政策</a>
<a href="#"><i class="fas fa-question-circle"></i> 使用帮助</a>
<a href="#"><i class="fas fa-code"></i> API文档</a>
<a href="#"><i class="fas fa-envelope"></i> 联系我们</a>
</div>
</footer>
</div>
</body>
</html>styles_mobile.css
手机css文件
/* 移动端优化样式 */
:root {
--mobile-padding: 16px;
--input-height: 44px;
}
* {
box-sizing: border-box;
-webkit-tap-highlight-color: transparent;
}
body {
margin: 0;
padding: 0;
overflow-x: hidden;
font-size: 14px;
}
.app-container {
min-height: 100vh;
display: flex;
flex-direction: column;
}
header {
padding: 12px var(--mobile-padding);
background: linear-gradient(135deg, #667eea0%, #764ba2 100%);
color: white;
position: sticky;
top: 0;
z-index: 100;
}
.logo {
display: flex;
align-items: center;
gap: 12px;
margin-bottom: 12px;
}
.logo-icon {
width: 36px;
height: 36px;
background: rgba(255, 255, 255, 0.2);
border-radius: 10px;
display: flex;
align-items: center;
justify-content: center;
font-size: 18px;
}
.logo h1 {
font-size: 18px;
margin: 0;
font-weight: 600;
}
.tagline {
font-size: 12px;
opacity: 0.9;
margin: 2px 0 0 0;
}
.mode-selector {
display: flex;
overflow-x: auto;
gap: 8px;
padding-bottom: 4px;
-webkit-overflow-scrolling: touch;
}
.mode-btn {
padding: 8px 16px;
background: rgba(255, 255, 255, 0.1);
border: none;
border-radius: 20px;
color: white;
font-size: 13px;
display: flex;
align-items: center;
gap: 6px;
white-space: nowrap;
}
.mode-btn.active {
background: white;
color: #667eea;
}
.main-content {
flex: 1;
display: flex;
flex-direction: column;
padding: var(--mobile-padding);
}
.chat-container {
flex: 1;
display: flex;
flex-direction: column;
background: white;
border-radius: 16px;
box-shadow: 0 2px 20px rgba(0, 0, 0, 0.05);
overflow: hidden;
margin-bottom: var(--mobile-padding);
}
.chat-header {
padding: 16px;
border-bottom: 1px solid #e5e7eb;
display: flex;
justify-content: space-between;
align-items: center;
}
.chat-header h2 {
font-size: 16px;
margin: 0;
font-weight: 600;
}
.clear-chat-btn {
padding: 6px 12px;
background: #f3f4f6;
border: none;
border-radius: 8px;
color: #6b7280;
font-size: 12px;
display: flex;
align-items: center;
gap: 4px;
}
.net-search {
padding: 12px 16px;
border-bottom: 1px solid #e5e7eb;
display: flex;
align-items: center;
gap: 8px;
}
.net-search input[type="checkbox"] {
width: 18px;
height: 18px;
}
.messages-container {
flex: 1;
padding: 16px;
overflow-y: auto;
-webkit-overflow-scrolling: touch;
max-height: 50vh;
}
.message {
display: flex;
gap: 12px;
margin-bottom: 20px;
}
.message.user {
flex-direction: row-reverse;
}
.avatar {
width: 32px;
height: 32px;
border-radius: 50%;
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
display: flex;
align-items: center;
justify-content: center;
color: white;
flex-shrink: 0;
}
.message.user .avatar {
background: #10b981;
}
.message-content {
max-width: 75%;
}
.message-text {
padding: 12px;
border-radius: 12px;
background: #f3f4f6;
line-height: 1.5;
font-size: 14px;
}
.message.user .message-text {
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
color: white;
border-radius: 12px 12px 4px 12px;
}
.message-image {
max-width: 100%;
border-radius: 12px;
margin-top: 8px;
}
.input-container {
padding: 16px;
border-top: 1px solid #e5e7eb;
}
.upload-area {
padding: 20px;
border: 2px dashed #d1d5db;
border-radius: 12px;
text-align: center;
margin-bottom: 16px;
background: #f9fafb;
}
.upload-area i {
font-size: 24px;
color: #6b7280;
margin-bottom: 8px;
}
.upload-area p {
margin: 8px 0;
color: #374151;
font-weight: 500;
}
.upload-area span {
font-size: 12px;
color: #9ca3af;
}
.input-area {
display: flex;
align-items: flex-end;
gap: 8px;
background: #f9fafb;
border-radius: 24px;
padding: 8px;
}
.action-buttons {
display: flex;
gap: 4px;
margin-bottom: 8px;
}
.action-btn, .send-btn, .generate-image {
width: 36px;
height: 36px;
border: none;
border-radius: 50%;
display: flex;
align-items: center;
justify-content: center;
background: white;
color: #6b7280;
box-shadow: 0 2px 8px rgba(0, 0, 0, 0.05);
}
.send-btn {
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
color: white;
flex-shrink: 0;
}
.generate-image {
background: #10b981;
color: white;
flex-shrink: 0;
}
.text-input {
flex: 1;
border: none;
background: transparent;
padding: 8px 12px;
font-size: 14px;
resize: none;
min-height: 36px;
max-height: 120px;
outline: none;
font-family: inherit;
width: 100% !important; /* 覆盖行内样式 */
}
.text-input::placeholder {
color: #9ca3af;
}
.status {
padding: 12px;
border-radius: 8px;
margin-top: 12px;
display: flex;
align-items: center;
gap: 8px;
font-size: 13px;
display: none;
}
.status.info {
background: #eff6ff;
color: #1d4ed8;
}
.status.success {
background: #ecfdf5;
color: #047857;
}
.status.error {
background: #fef2f2;
color: #dc2626;
}
.side-panel {
background: white;
border-radius: 16px;
padding: 20px;
box-shadow: 0 2px 20px rgba(0, 0, 0, 0.05);
}
.panel-section {
margin-bottom: 24px;
}
.panel-section h3 {
font-size: 16px;
margin: 0 0 16px 0;
display: flex;
align-items: center;
gap: 8px;
color: #374151;
}
.image-preview {
width: 100%;
height: 200px;
border-radius: 12px;
background: #f9fafb;
display: flex;
flex-direction: column;
align-items: center;
justify-content: center;
margin-bottom: 16px;
overflow: hidden;
}
.image-preview img {
width: 100%;
height: 100%;
object-fit: cover;
display: none;
}
footer {
padding: 20px var(--mobile-padding);
background: #f9fafb;
text-align: center;
border-top: 1px solid #e5e7eb;
}
footer p {
margin: 0 0 16px 0;
color: #6b7280;
font-size: 12px;
}
.footer-links {
display: flex;
flex-wrap: wrap;
justify-content: center;
gap: 16px;
}
.footer-links a {
color: #667eea;
text-decoration: none;
font-size: 12px;
display: flex;
align-items: center;
gap: 4px;
}
/* 响应式调整 */
@media (min-width: 768px) {
.main-content {
flex-direction: row;
gap: var(--mobile-padding);
}
.chat-container {
flex: 2;
margin-bottom: 0;
}
.side-panel {
flex: 1;
}
.messages-container {
max-height: 60vh;
}
}
/* 触摸设备优化 */
@media (hover: none) and (pointer: coarse) {
button, .action-btn, .send-btn, .generate-image {
min-height: 44px;
min-width: 44px;
}
.mode-btn {
padding: 10px 20px;
}
.clear-chat-btn {
padding: 8px 16px;
}
}
/* 隐藏桌面端特定元素 */
.desktop-only {
display: none;
}
/* 滚动条样式 */
::-webkit-scrollbar {
width: 4px;
}
::-webkit-scrollbar-track {
background: transparent;
}
::-webkit-scrollbar-thumb {
background: #d1d5db;
border-radius: 2px;
}static\script.js
前端语音控制的js文件
let websocket;
let audioContext;
let scriptProcessor;
let audioInput;
let audioStream;
let isRecording = false; // 添加录音状态标记
async function getSpeechToken() {
try {
const response = await fetch('/api/speech/token');
if (!response.ok) {
throw new Error(`HTTP error! status: ${response.status}`);
}
const data = await response.json();
return {
token: data.token,
appkey: data.appkey
};
} catch (error) {
console.error('获取Token失败:', error);
// 如果后端接口失败,使用一个兼容性方案
throw error;
}
}
// 更新连接状态
function updateStatus(status) {
const statusDisplay = document.getElementById('question') || document.getElementById('status');
if (statusDisplay) {
if (statusDisplay.tagName === 'TEXTAREA') {
// 如果是textarea,我们只更新placeholder或添加特殊标记
statusDisplay.placeholder = status;
} else {
statusDisplay.textContent = status;
statusDisplay.style.color = status === '已连接' ? 'green' : 'red';
}
}
}
// 生成 UUID
function generateUUID() {
return ([1e7] + -1e3 + -4e3 + -8e3 + -1e11).replace(/[018]/g, c =>
(c ^ crypto.getRandomValues(new Uint8Array(1))[0] & 15 >> c / 4).toString(16)
).replace(/-/g, '');
}
// 将识别结果添加到textarea
function addToTextarea(text, isFinal = false) {
const textarea = document.getElementById('question');
if (!textarea) return;
if (isFinal) {
// 如果是最终结果,追加到末尾并换行
if (textarea.value) {
textarea.value += '\n' + text;
} else {
textarea.value = text;
}
textarea.value += '\n'; // 添加换行便于区分
// 添加一个临时提示(最终结果)
const tempText = document.createElement('div');
tempText.textContent = `✓ 识别完成: ${text}`;
tempText.style.color = 'green';
tempText.style.fontWeight = 'bold';
document.body.appendChild(tempText);
setTimeout(() => tempText.remove(), 3000);
} else {
// 如果是中间结果,可以显示在别处或替换最后一行
// 这里我们创建一个浮动显示
showIntermediateResult(text);
}
// 滚动到textarea底部
textarea.scrollTop = textarea.scrollHeight;
// 触发input事件以便其他监听器能捕获
textarea.dispatchEvent(new Event('input'));
}
// 显示中间结果(浮动显示)
function showIntermediateResult(text) {
// 移除已有的中间结果显示
let floatingDiv = document.getElementById('floating-result');
if (!floatingDiv) {
floatingDiv = document.createElement('div');
floatingDiv.id = 'floating-result';
floatingDiv.style.position = 'fixed';
floatingDiv.style.bottom = '100px';
floatingDiv.style.right = '20px';
floatingDiv.style.backgroundColor = 'rgba(0,0,0,0.7)';
floatingDiv.style.color = 'white';
floatingDiv.style.padding = '10px';
floatingDiv.style.borderRadius = '5px';
floatingDiv.style.zIndex = '1000';
document.body.appendChild(floatingDiv);
}
floatingDiv.textContent = `正在识别: ${text}`;
floatingDiv.style.display = 'block';
// 3秒后淡出
clearTimeout(floatingDiv.timeout);
floatingDiv.timeout = setTimeout(() => {
floatingDiv.style.opacity = '0';
floatingDiv.style.transition = 'opacity 1s';
setTimeout(() => {
if (floatingDiv.parentNode) {
floatingDiv.parentNode.removeChild(floatingDiv);
}
}, 1000);
}, 3000);
}
// 日志消息
function logMessage(message) {
console.log('日志:', message); // 保持控制台日志
// 可选:在页面上其他位置显示日志
const logArea = document.getElementById('messages') || document.getElementById('log-area');
if (logArea) {
const messageElement = document.createElement('div');
messageElement.textContent = message;
logArea.appendChild(messageElement);
logArea.scrollTop = logArea.scrollHeight;
}
}
// 打开WebSocket连接
async function connectWebSocket() {
try {
// 1. 从后端获取临时Token
const auth = await getSpeechToken();
const appkey = auth.appkey;
const token = auth.token;
// 2. 原有代码不变,只是token和appkey现在是动态获取的
const socketUrl = `wss://nls-gateway.cn-shanghai.aliyuncs.com/ws/v1?token=${token}`;
websocket = new WebSocket(socketUrl);
websocket.onopen = function() {
updateStatus('已连接');
logMessage('连接到 WebSocket 服务器');
var startTranscriptionMessage = {
header: {
appkey: appkey, // 使用动态获取的appkey
namespace: "SpeechTranscriber",
name: "StartTranscription",
task_id: generateUUID(),
message_id: generateUUID()
},
payload: {
"format": "pcm",
"sample_rate": 16000,
"enable_intermediate_result": true,
"enable_punctuation_prediction": true,
"enable_inverse_text_normalization": true
}
};
websocket.send(JSON.stringify(startTranscriptionMessage));
};
// 3. 原有的事件监听器保持不变
websocket.onmessage = function(event) {
logMessage('收到消息: ' + event.data);
try {
const message = JSON.parse(event.data);
if (message.header.name === "TranscriptionStarted") {
logMessage('语音识别已启动');
}
else if (message.header.name === "TranscriptionResultChanged") {
const text = message.payload.result;
if (text) {
addToTextarea(text, false);
logMessage('中间结果: ' + text);
}
}
else if (message.header.name === "SentenceEnd") {
const text = message.payload.result;
if (text) {
addToTextarea(text, true);
logMessage('识别完成: ' + text);
}
}
else if (message.header.name === "TranscriptionCompleted") {
logMessage('语音识别完成');
}
else if (message.header.name === "TaskFailed") {
logMessage('识别失败: ' + (message.payload.message || '未知错误'));
}
} catch (e) {
logMessage('解析消息失败: ' + e.message);
}
};
websocket.onerror = function(event) {
updateStatus('错误');
logMessage('WebSocket 错误: ' + event.type);
};
websocket.onclose = function() {
updateStatus('断开连接');
logMessage('与 WebSocket 服务器断开');
};
// 4. 原有录音逻辑保持不变
startRecording();
} catch (error) {
console.error('连接WebSocket失败:', error);
// 5. 可选的兼容性提示
const textarea = document.getElementById('question');
if (textarea) {
textarea.value += '\n[语音服务暂时不可用,请稍后重试]\n';
textarea.scrollTop = textarea.scrollHeight;
}
updateStatus('连接失败');
}
}
// 断开WebSocket连接
function disconnectWebSocket() {
if (websocket) {
websocket.close();
}
updateStatus('未连接');
isRecording = false;
}
// 开始录音
async function startRecording() {
try {
if (isRecording) {
logMessage('已经在录音中');
return;
}
logMessage('开始录音...');
// 获取音频输入设备
audioStream = await navigator.mediaDevices.getUserMedia({
audio: {
sampleRate: 16000,
channelCount: 1,
echoCancellation: true,
noiseSuppression: true
}
});
audioContext = new (window.AudioContext || window.webkitAudioContext)({
sampleRate: 16000
});
audioInput = audioContext.createMediaStreamSource(audioStream);
// 设置缓冲区大小为2048的脚本处理器
scriptProcessor = audioContext.createScriptProcessor(2048, 1, 1);
scriptProcessor.onaudioprocess = function(event) {
if (!isRecording) return;
const inputData = event.inputBuffer.getChannelData(0);
const inputData16 = new Int16Array(inputData.length);
for (let i = 0; i < inputData.length; ++i) {
inputData16[i] = Math.max(-1, Math.min(1, inputData[i])) * 0x7FFF; // PCM 16-bit
}
if (websocket && websocket.readyState === WebSocket.OPEN) {
websocket.send(inputData16.buffer);
}
};
audioInput.connect(scriptProcessor);
scriptProcessor.connect(audioContext.destination);
isRecording = true;
logMessage('录音已开始');
// 在textarea中添加开始录音标记
const textarea = document.getElementById('question');
if (textarea) {
if (textarea.value && !textarea.value.endsWith('\n')) {
textarea.value += '\n';
}
//textarea.value += '[开始录音...]\n';
textarea.scrollTop = textarea.scrollHeight;
}
} catch (e) {
logMessage('录音失败: ' + e.message);
isRecording = false;
}
}
// 停止录音
function stopRecording() {
if (!isRecording) {
logMessage('当前没有在录音');
return;
}
if (scriptProcessor) {
scriptProcessor.disconnect();
scriptProcessor = null;
}
if (audioInput) {
audioInput.disconnect();
audioInput = null;
}
if (audioStream) {
audioStream.getTracks().forEach(track => track.stop());
audioStream = null;
}
if (audioContext) {
audioContext.close();
audioContext = null;
}
isRecording = false;
logMessage('录音已停止');
// 在textarea中添加停止录音标记
const textarea = document.getElementById('question');
if (textarea) {
//textarea.value += '[录音结束]\n';
textarea.scrollTop = textarea.scrollHeight;
textarea.dispatchEvent(new Event('input'));
}
disconnectWebSocket()
}
// 添加一个清理函数
function clearTextarea() {
const textarea = document.getElementById('question');
if (textarea) {
textarea.value = '';
textarea.dispatchEvent(new Event('input'));
}
}
// 添加回车键处理(如果textarea已有onkeyup="doEnter(event)")
function doEnter(event) {
if (event.key === 'Enter' && !event.shiftKey) {
event.preventDefault();
// 处理发送逻辑
const textarea = document.getElementById('question');
if (textarea && textarea.value.trim()) {
// 这里可以添加发送消息的逻辑
console.log('发送消息:', textarea.value);
// 清空或保留内容
textarea.value = '';
}
}
}static\script1.js
语音外的js文件
let switch_voice = false; // 全局变量,默认关闭语音播报
const synth = window.speechSynthesis; // 全局变量,语音合成对象
sessionStorage.clear(); // 每次刷新页面时均将之前的内容清空
// 定义语音朗读代码
function speak(content) {
// 实例化语音合成对象,并将要朗读的文件作为参数传入
let utterance = new SpeechSynthesisUtterance(content);
utterance.lang = 'zh-CN'; // 设置为中英文(基于操作系统的区域设置而定)
utterance.volume = 1; // 音量:默认为1,可以调整为0.8,1.2等
utterance.rate = 1; // 语速,默认为1,也可以调整得更快或更慢
synth.cancel(); // 清空语音合成队列,表示不再朗读
synth.speak(utterance); // 朗读
}
// 定义朗读与停止的功能切换
function readText(obj) {
// 参数obj代表由AI回复的过程中动态生成的朗读按钮
let chatbox = document.getElementById('chatbox');
if (switch_voice) { // 如果语音播报已开启,则关闭
obj.innerText = '朗读';
switch_voice = false;
synth.cancel();
} else {
obj.innerText = '停止';
switch_voice = true;
synth.cancel();
// 获取到该按钮的父容器的内容:即AI回复的DIV里面的内容
// 由于朗读按钮的定义也在该DIV的innerHTML,所以要将其删除
let content = obj.parentNode.innerHTML.split("<button")[0];
if (content) {
speak(content);
}
}
}
// 响应Ctrl+Enter的回车事件,用于代替点击“智能问答”按钮的功能
function doEnter(e) {
if (e.key == "Enter" && e.ctrlKey) {
doAsk();
}
}
// 滚动到最底部
function scrollToBottom() {
var chatbox = document.getElementById('chatbox');
chatbox.scrollTop = chatbox.scrollHeight;
}
function clearChat(){
question.value =""
}
// 此处进行一下判断,如果用户上传了图片,则说明是图像识别功能,否则是普通问答,需要调用的函数也不一样
function doAsk() {
// 创建一个提问的DIV元素,并设置其class属性为ask-box,以匹配CSS样式
let ask = document.createElement('div');
ask.setAttribute("class", "ask-box");
if (sessionStorage.getItem("image")) {
ask.innerHTML = '<img src="' + sessionStorage.getItem("image") + '" style="width:100%"><br/>' + question.value;
document.getElementById("chatbox").append(ask);
scrollToBottom();
recognizeImage();
}
else {
ask.innerHTML = document.getElementById("question").value;
// 将该DIV元素添加到chatbox对话框DIV中作为一个子元素
document.getElementById("chatbox").append(ask);
scrollToBottom();
doAnswer();
}
}
// 普通文本问答的前端代码实现
function doAnswer() {
// 创建AI回复的DIV元素,并设置其class属性为answer-box,以匹配CSS样式
let answer = document.createElement('div');
answer.setAttribute("class", "answer-box");
document.getElementById("chatbox").append(answer);
let content = document.getElementById("question").value;
let checkbox = document.getElementById("net-search");
if (checkbox.checked) {
params = {"content": content, "search": true}
}
else {
params = {"content": content, "search": false}
}
// 调用fetch函数实现后端响应流的读取和解析并添加到回复DIV中
fetch("/stream", {
method: 'POST',
headers: {'Content-Type': 'application/json'},
body: JSON.stringify(params)
}).then(async result => {
const reader = result.body.getReader();
const textDecoder = new TextDecoder("utf-8");
while(true){
const {done, value} = await reader.read();
if(done) {
// 朗读按钮直接添加到回复的内容后面
answer.innerHTML += "<button onclick='readText(this)' class='read-button' id='speak'>朗读</button>";
scrollToBottom();
return
}
let jsonList = textDecoder.decode(value).split("\n");
for (let i=0; i<jsonList.length-1; i++) {
jsonObj = JSON.parse(jsonList[i]);
// 将回复内容的\n替换为<br>以便于在浏览器中换行
answer.innerHTML += jsonObj['content'].replaceAll("\n", "<br/>");
}
}
});
}
// 保存图片到sessionStorage并进行预览显示
function saveAndPreview() {
// 获取文件上传框元素
var input = document.getElementById('imageInput');
// 判断是否有文件,如果有则执行以下操作
if (input.files && input.files[0]) {
// 使用FileReader读取文件流,并响应onload事件
var reader = new FileReader();
reader.onload = function(e) {
var img = document.getElementById('preview');
img.src = e.target.result; // 获取Base64编码
img.style.display = 'block'; // 将预览DIV元素显示出来
// 缩小userask文本域的宽度,以显示预览图
document.getElementById('question').style.width = '450px';
document.getElementById('imageDiv').style.display = 'block';
// 将图片数据存储到sessionStorage
sessionStorage.clear();
sessionStorage.setItem('image', e.target.result);
// 焦点放置到输入框中,并提供默认的提示词
var question = document.getElementById("question");
question.focus();
// 为了让用户少一些输入,提前预置一段提示词
question.value = "请识别图片中的内容,尽量覆盖尽可能多的信息,描述尽量简洁明了。";
}
reader.readAsDataURL(input.files[0]);
}
}
function addImage() {
document.getElementById('imageInput').click();
}
// 图像识别的前后端对接
function recognizeImage() {
let answer = document.createElement('div');
let question = document.getElementById("question");
let ask = document.createElement('div');
ask.setAttribute("class", "ask-box");
// 直接以Base64方式上传图片到后台
let params = {"base64": sessionStorage.getItem("image"), "content": question.value}
answer.setAttribute("class", "answer-box");
document.getElementById("chatbox").append(answer);
fetch("/recognize", {
method: 'POST',
headers: {'Content-Type': 'application/json'},
body: JSON.stringify(params)
}).then(async result => {
const reader = result.body.getReader();
const textDecoder = new TextDecoder("utf-8");
while(true){
const {done, value} = await reader.read();
if(done) {
answer.innerHTML += "<button onclick='readText(this)' class='read-button' id='speak'>朗读</button>";
scrollToBottom();
return
}
let jsonList = textDecoder.decode(value).split("\n");
for (let i=0; i<jsonList.length-1; i++) {
jsonObj = JSON.parse(jsonList[i]);
answer.innerHTML += jsonObj['content'].replaceAll("\n", "<br/>");
}
}
});
// 删除sessionStorage中的图片,并隐藏预览框,还原初始状态
sessionStorage.clear();
document.getElementById("imageDiv").style.display = "none";
document.getElementById("question").style.width = "560px";
}
// 图像生成的前端代码对接与渲染
function generateImage() {
let ask = document.createElement('div');
ask.setAttribute("class", "ask-box");
ask.innerHTML = document.getElementById("question").value;
document.getElementById("chatbox").append(ask);
scrollToBottom();
let answer = document.createElement('div');
answer.setAttribute("class", "answer-box");
fetch("/generate", {
method: 'POST',
headers: {'Content-Type': 'application/json'},
body: JSON.stringify({"content": document.getElementById("question").value})
}).then(result => {
return result.json();
}).then(data => {
let url = data['image_url'];
// 将图片渲染到回复框中
answer.innerHTML = "<img src='"+url+"' style='width: 100%'/>";
document.getElementById("chatbox").append(answer);
scrollToBottom();
});
}以下是他生成的图
提示词:武松打虎

提示词:帮我生成一幅美丽的风景画

顾翔凡言:AI时代,掌握已知的未知已非难事,关键在于如何发现未知的未知——而它往往就藏在探索已知之未知的过程中。