首页
学习
活动
专区
圈层
工具
发布
社区首页 >专栏 >我的人工智能产品

我的人工智能产品

作者头像
顾翔
发布2026-01-29 14:12:52
发布2026-01-29 14:12:52
240
举报

前提:

配置好

  • ALIYUN_ACCESS_KEY_ID
  • ALIYUN_ACCESS_KEY_SECRET
  • ALIYUN_APP_KEY
  • ALIYUN_ASR_APP_KEY (值与ALIYUN_APP_KEY一致)
  • Aliyun_Search_Key:用于联网搜索
  • AMAP_API_KEY:与高德搜索,获取天气预报

本文采用FastAPI+Python 使用阿里千问开发人工智能产品产品,界面如下:

支持文本对话、图像生成、图像识别、语音输入(仅适用PC环境)

后端

func_calling.py

这段代码实现了一个邮件发送系统,并集成了函数调用(Function Calling)能力

代码语言:javascript
复制
import smtplib, time, os    # smtplib模块主要用于处理SMTP协议
# email模块主要处理邮件的头和正文等数据
# from email.mime.application import MIMEApplication
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from dotenv import load_dotenv
load_dotenv()
def send_email(receiver, content, subject=None):
    sender = 'xianggu625@126.com'  # 发送邮箱
    # 构建邮件的主体对象
    msg = MIMEMultipart()
    if subject is not None:
        msg['Subject'] = subject
    else:
        msg['Subject'] = f"来自{sender}的问候邮件"
    msg['From'] = sender
    msg['To'] = receiver
    # 构建邮件的正文内容# 构建邮件的正文内容
    body = MIMEText(content, 'html', 'utf-8')
    msg.attach(body)
    # 建立与邮件服务器的连接并发送邮件
    smtpObj = smtplib.SMTP_SSL('smtp.126.com', 465)   # 如果基于SSL,则 smtplib.SMTP_SSL
    smtpObj.login(user=sender, password=os.getenv("Mail_Password"))
    smtpObj.sendmail(sender, receiver, str(msg))
    smtpObj.quit()
    return "邮件已经成功发送到:" + receiver
# 系统级提示词
system_prompt = """
	你是一名AI助手,具备函数调用的能力,但是如果提供的信息已经足够回答用户的问题,则不需要再进行函数调用。
	同时,请严格按照函数调用的方式进行处理,如果用户未提供函数所需参数,则必须询问,而不能自作主张。
"""
# 声明函数及参数说明,注意中文描述要尽量准确,便于大模型理解
functions = [
{
    "type": "function",
    "function": {
        "name": "send_email",
        "description": "向指定邮箱地址发送一封邮件",
        "parameters": {
            "type": "object",
            "properties": {
                "receiver": {
                    "type": "string",
                    "description": "邮件的收件地址",
                },
                "content": {
                    "type": "string",
                    "description": "邮件的正文内容,支持HTML格式",
                },
                "subject": {
                    "type": "string",
                    "description": "邮件的标题,如果没有标题,可以设置为空",
                },
            },
            "required": ["receiver", "content"]
        },
    }
}
]
# 对邮件发送功能进行测试
if __name__ == '__main__':
send_email("xianggu625@126.com", "祝你节日快乐,工作顺利。")

module.py

这段代码实现了一个阿里云智能搜索API的Python客户端,对阿里云OpenSearch服务(智能搜索能力)的调用,用于在文档、网页等数据中进行语义搜索并返回摘要结果。

代码语言:javascript
复制
import requests
import os
import json
from dotenv import load_dotenv
load_dotenv()
def aliyun_search(content):
    print(f"🔍 搜索内容: {content}")
    print(f"📝 使用的密钥前几位: {os.getenv('Aliyun_Search_Key')[:15] if os.getenv('Aliyun_Search_Key') else '无密钥'}")
    url = "http://default-cu35.platform-cn-shanghai.opensearch.aliyuncs.com/v3/openapi/workspaces/default/web-search/ops-web-search-001"
    header = {"Content-Type": "application/json", "Authorization": f"Bearer {os.getenv('Aliyun_Search_Key')}"}
    data = {"query": content, "top_k": 3, "way": "full", "content_type": "summary"}
    print(f"🌐 请求URL: {url}")
    print(f"📦 请求数据: {data}")
    try:
        resp = requests.post(url, headers=header, json=data, timeout=10)
        print(f"📡 状态码: {resp.status_code}")
        if resp.status_code != 200:
            print(f"❌ 请求失败! 响应内容: {resp.text}")
            return []
        # 打印完整响应(用于调试)
        response_json = resp.json()
        print("📄 完整响应结构:")
        print(json.dumps(response_json, indent=2, ensure_ascii=False, default=str))
        # 安全地获取结果
        if 'result' in response_json and 'search_result' in response_json['result']:
            results = response_json['result']['search_result']
        elif 'search_result' in response_json:
            results = response_json['search_result']
        elif 'data' in response_json and 'search_result' in response_json['data']:
            results = response_json['data']['search_result']
        elif 'items' in response_json:
            results = response_json['items']
        elif 'hits' in response_json and 'hits' in response_json['hits']:
            results = response_json['hits']['hits']
        else:
            print("⚠️  警告: 未找到预期的响应结构")
            # 尝试直接返回整个响应或空数组
            results = response_json if isinstance(response_json, list) else []
        print(f"✅ 成功解析 {len(results)} 个结果")
        return results
    except requests.exceptions.Timeout:
        print("⏰ 请求超时")
        return []
    except json.JSONDecodeError:
        print("❌ 响应不是有效的JSON格式")
        print(f"原始响应: {resp.text[:500]}...")
        return []
    except Exception as e:
        print(f"💥 搜索请求出错: {type(e).__name__}: {e}")
        return []

generate.py

图像生成

代码语言:javascript
复制
from fastapi import APIRouter, Body
from http import HTTPStatus
import os, requests
from dashscope import ImageSynthesis  # 引入SDK
from dotenv import load_dotenv
load_dotenv()
generate = APIRouter()
# 本功能使用通义万相V2版本实现,文生图不支持OpenAI的SDK,需要使用Dashscope的SDK
# 文生图也不存在流式响应,因为响应的内容是一张生成的图片
@generate.post("/generate")
def generate_image(data: dict=Body()):
    api_key=os.getenv("Dashscope_API_Key")
    rsp = ImageSynthesis.call(api_key=api_key,
              model="wanx2.1-t2i-turbo",   # 模型名称
              prompt=data['content'],      # 提示词
              n=1,                         # 生成图片数量
              size='1024*1024')            # 图片尺寸
    if rsp.status_code == HTTPStatus.OK:
        # 在images目录下保存图片
        for result in rsp.output.results:
            # result.url为生成图片的在线地址
            file_name = result.url.split('/')[-1].split("?")[0]
            with open(f'./static/images/{file_name}', 'wb') as f:
                f.write(requests.get(result.url).content)
    # 将图片的URL地址响应给前端,以便于前端进行渲染
return {"message":"successful", "image_url": f"/static/images/{file_name}"}

recognize.py

图像识别

代码语言:javascript
复制
from fastapi import APIRouter, Body
from fastapi.responses import StreamingResponse
import json, os, base64
from openai import OpenAI
from dotenv import load_dotenv
load_dotenv()
recog = APIRouter()
# 图像识别通常为单次对话,可以不保存历史记录
@recog.post("/recognize")
def recognize_image(data: dict=Body()):
    # JS提交过来的Base64格式大致为:data:image/jpeg;base64,9j/4AAQSkZJRgA
    # 如果发送给千问VL图像识别模型,则可以直接发送
    # 如果要保存到本地,则需要提取其中的Base64字符串正文部分
    b64str = data['base64'].split(',')[1]
    def stream_chat():
        client = OpenAI(api_key=os.getenv("Dashscope_API_Key"),
                        base_url="https://dashscope.aliyuncs.com/compatible-mode/v1")
        completion = client.chat.completions.create(
            model="qwen-vl-max-latest",
            messages=[
                {"role": "system","content": "你是一名专业的AI助手,可以帮助用户解答任何问题,也能以精准简洁的语言识别并描述出图像的内容。"},
                {"role": "user", "content": [{
                    "type": "image_url", "image_url": data['base64']
                },
                {"type": "text", "text": data['content']}]}
            ],
            stream=True
        )
        for chunk in completion:
            # 使用生成器迭代输出每一个数据流
            choice = chunk.choices[0].delta.content
            yield json.dumps({"content": choice}) + "\n"
    # 以流式响应的方式响应给前端
    return StreamingResponse(stream_chat(), media_type="text/event-stream")

qa.py

提供一个支持「流式响应 + 外部知识检索 + 函数调用」的智能对话 API**,专为构建类 ChatGPT 的 Web/移动端 AI 助手而设计

代码语言:javascript
复制
from fastapi import APIRouter, Body
from fastapi.responses import StreamingResponse
import json, os
from openai import OpenAI
from dotenv import load_dotenv
load_dotenv()
from module import aliyun_search
from func_calling import send_email,functions
qa = APIRouter()
messages = [{
    "role": "system",
    "content": "你是一名专业的AI助手,可以帮助用户解答任何问题。"
}]
# 定义接口,并将生成器输出封装到响应中
@qa.post("/stream")
def stream(question: dict=Body()):
    # 读取JSON参数的值
    content = question['content']
    search = question['search']
    if search == True:
        search_result = aliyun_search(content)
        # print(search_result)
        message = {"role": "user", "content": f"请使用以下内容:\n{search_result}\n,并基于用户的提问:\n{content}\n来进行回答。"}
    else:
        message = {"role": "user", "content": content}
    # 增加函数调用的功能,让大模型理解用户的提问后返回函数调用的声明
    def check_func_call(message):
        messages.append(message)
        client = OpenAI(api_key=os.getenv("Dashscope_API_Key"),
                        base_url="https://dashscope.aliyuncs.com/compatible-mode/v1")
        completion = client.chat.completions.create(
            model="qwen-plus",
            messages=messages,
            stream=False,     # 函数调用不能与流式响应同时处理
            tools=functions
        )
        return completion.choices[0].message
    # stream_chat中不再运行messages.append(message)
    def stream_chat():
        # messages.append(message)
        # print(messages)
        client = OpenAI(api_key=os.getenv("Dashscope_API_Key"),
                        base_url="https://dashscope.aliyuncs.com/compatible-mode/v1")
        completion = client.chat.completions.create(
            model="qwen-plus",
            messages=messages,
            stream=True,
            stream_options={"include_usage": False}
        )
        # 定义变量reply用于保存本次回复内容,以便于实现聊天记忆功能
        reply = ""
        for chunk in completion:
            # 使用生成器迭代输出每一个数据流
            choice = chunk.choices[0].delta.content
            reply += choice
            yield json.dumps({"content": choice}) + "\n"
        # 循环结束后,将AI回复也添加到messages中
        messages.append({"role": "assistant", "content": reply})
    # 首先调用check_func_call,确认是否有函数调用
    # 如果有,则进行函数调用,并让大模型返回一个函数调用的结果给用户
    # 如果不存在函数调用的情况,则直接将用户的提问交给stream_chat来完成
    output = check_func_call(message)
    if output.tool_calls:
        func_name = output.tool_calls[0].function.name
        func_args = eval(output.tool_calls[0].function.arguments)
        func = globals()[func_name]
        result = func(**func_args)
        messages.append({"role": "user", "content": f"请将以下内容直接回复给用户: {result}"})
    else:
        messages.append(message)   # 此处已经添加用户问题,所以stream_chat中不再添加
    return StreamingResponse(stream_chat(), media_type="text/event-stream")

main.py

后端主文件

代码语言:javascript
复制
from fastapi import FastAPI, Request
from fastapi.templating import Jinja2Templates
from qa import qa
from recognize import recog
from generate import generate
import uvicorn, os
from dotenv import load_dotenv
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from fastapi.staticfiles import StaticFiles
# 先加载环境变量
load_dotenv()
app = FastAPI(
    title="AI助手API (含MCP功能)",
    description="集成高德地图MCP功能的AI助手",
    version="1.0.0"
)
# CORS配置
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)
# 设置静态目录
app.mount("/static", StaticFiles(directory="static"), name="static")
templates = Jinja2Templates(directory="templates")
# 导入并包含路由
from qa import qa
from recognize import recog
from generate import generate
# 延迟导入,避免循环导入问题
try:
    from api.token import router as token_router
    app.include_router(token_router)
    print("✅ Token路由注册成功")
except ImportError as e:
    print(f"⚠️  Token路由导入失败: {e}")
# 包含其他路由
app.include_router(qa)
app.include_router(recog)
app.include_router(generate)
@app.get('/')
def chat(request: Request):
    return templates.TemplateResponse(request=request, name="index.html")
@app.get('/api/info')
async def api_info():
    """API信息"""
    return {
        "service": "AI Assistant with MCP",
        "version": "1.0.0",
        "features": ["普通聊天", "高德地图MCP集成"],
        "endpoints": {
            "POST /stream": "流式聊天接口",
            "GET /health": "健康检查",
            "GET /api/info": "API信息"
        },
        "config_status": {
            "dashscope": bool(os.getenv("Dashscope_API_Key")),
            "amap": bool(os.getenv("AMAP_API_KEY"))
        }
    }
if __name__ == '__main__':
    
#uvicorn
.run(app)
uvicorn.run(app, host="0.0.0.0", port=8000, log_level="info")

前端

templates\index.html

代码语言:javascript
复制
<!DOCTYPE html>
<html lang="zh-CN">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no">
    <title>智能多模态AI助手</title>
    <script src="/static/script.js"></script>
    <script src="/static/script1.js"></script>
	<script src="/static/script_moblie.js"></script>
    <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css">
    <link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap" rel="stylesheet">
	<link rel="stylesheet" href="../static/styles_mobile.css">
</head>
<body>
    <div class="app-container">
        <header>
            <div class="logo">
                <div class="logo-icon">
                    <i class="fas fa-robot"></i>
                </div>
                <div>
                    <h1>多模态AI助手</h1>
                    <p class="tagline">支持对话、识图、绘图与语音交互</p>
                </div>
            </div>
            <div class="mode-selector">
                <button class="mode-btn active" data-mode="chat">
                    <i class="fas fa-comments"></i> 对话
                </button>
            </div>
        </header>
        <div class="main-content">
            <div class="chat-container">
                <div class="chat-header">
                    <h2>AI对话</h2>
                    <button class="clear-chat-btn" id="clearChat" onclick="clearChat()">
                        <i class="fas fa-trash-alt"></i>
                        清空对话
                    </button>
                </div>
                <div class="net-search">
                    <input type="checkbox" id="net-search"/>联网
                </div>
                <div class="messages-container" id="chatbox">
                    <!-- 消息会动态添加到这里 -->
                    <div class="message ai">
                        <div class="avatar">
                            <i class="fas fa-robot"></i>
                        </div>
                        <div class="message-content">
                            <div class="message-text">
                                您好!我是多模态AI助手,支持文本对话、图像识别、图像生成和语音输入。请问有什么可以帮助您的?
                            </div>
                        </div>
                    </div>
                    <div class="message user">
                        <div class="avatar">
                            <i class="fas fa-user"></i>
                        </div>
                        <div class="message-content">
                            <div class="message-text">
                                请帮我生成一张日出的风景图片
                            </div>
                        </div>
                    </div>
                    <div class="message ai">
                        <div class="avatar">
                            <i class="fas fa-robot"></i>
                        </div>
                        <div class="message-content">
                            <div class="message-text">
                                这是根据您的要求生成的日出风景图:
                            </div>
                            <img src="https://images.unsplash.com/photo-1506905925346-21bda4d32df4?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=1350&q=80" alt="日出风景" class="message-image">
                            <div class="message-text" style="margin-top: 0.8rem; font-size: 0.9rem; color: 
#9ca3af
;">
                                图片已生成,展现了日出时分的美丽景色,包含了山脉、云海和温暖的阳光。
                            </div>
                        </div>
                    </div>
                </div>
                    <div class="input-area">
                        <div class="action-buttons">
                            <button class="action-btn" title="上传图片" id="recognize-image" onclick="addImage()">
                                <i class="fas fa-image"></i>
                            </button>
                            <button class="action-btn" id="voiceBtn" title="语音输入" onclick="connectWebSocket()">
                                <i class="fas fa-microphone"></i>
                            </button>
                            <button class="action-btn" id="attachBtn" title="停止录音" onclick="stopRecording()">
                                <i class="fas fa-stop-circle"></i>
                            </button>
                        </div>
                        <textarea id="question" onkeyup="doEnter(event)" class="text-input" placeholder="输入您的问题或指令..."></textarea>
                        <div class="send-buttons">
                            <button class="send-btn" id="qa-button" onclick="doAsk()">
                                <i class="fas fa-paper-plane"></i>
                            </button>
                            <button class="generate-image" id="generate-image" onclick="generateImage()">
                                <i class="fas fa-wand-magic-sparkles"></i>
                            </button>
                        </div>
                    </div>
                    <div class="status info" id="typingStatus">
                        <i class="fas fa-circle-notch fa-spin"></i>
                        <span>AI正在思考...</span>
                    </div>
                </div>
            </div>
            <div class="side-panel">
                <div class="panel-section" id="imageDiv" style="display: none;">
                    <h3><i class="fas fa-eye"></i> 图像识别</h3>
                    <input type="file" id="imageInput" onchange="saveAndPreview()" style="display: none;">
                    <div class="image-preview" id="imagePreview">
                        <img id="preview">
                        <i class="fas fa-image" style="font-size: 3rem; color: #475569;"></i>
                    </div>
                    <p style="color: #6b7280; font-size: 0.9rem; margin-bottom: 1rem;">
                        上传图片后,AI将分析图片内容并描述识别结果。
                    </p>
                </div>
                <div class="status success" id="successStatus">
                    <i class="fas fa-check-circle"></i>
                    <span>图像生成成功!</span>
                </div>
                <div class="status error" id="errorStatus">
                    <i class="fas fa-exclamation-circle"></i>
                    <span>发生错误,请重试。</span>
                </div>
            </div>
        </div>
        <footer>
            <p>多模态AI助手 &copy; 2025 | 支持文本、图像、语音交互</p>
            <div class="footer-links">
                <a href="#"><i class="fas fa-shield-alt"></i> 隐私政策</a>
                <a href="#"><i class="fas fa-question-circle"></i> 使用帮助</a>
                <a href="#"><i class="fas fa-code"></i> API文档</a>
                <a href="#"><i class="fas fa-envelope"></i> 联系我们</a>
            </div>
        </footer>
    </div>  
</body>
</html>

styles_mobile.css

手机css文件

代码语言:javascript
复制
       /* 移动端优化样式 */
        :root {
            --mobile-padding: 16px;
            --input-height: 44px;
        }
        * {
            box-sizing: border-box;
            -webkit-tap-highlight-color: transparent;
        }
        body {
            margin: 0;
            padding: 0;
            overflow-x: hidden;
            font-size: 14px;
        }
        .app-container {
            min-height: 100vh;
            display: flex;
            flex-direction: column;
        }
        header {
            padding: 12px var(--mobile-padding);
            background: linear-gradient(135deg, #667eea0%, #764ba2 100%);
            color: white;
            position: sticky;
            top: 0;
            z-index: 100;
        }
        .logo {
            display: flex;
            align-items: center;
            gap: 12px;
            margin-bottom: 12px;
        }
        .logo-icon {
            width: 36px;
            height: 36px;
            background: rgba(255, 255, 255, 0.2);
            border-radius: 10px;
            display: flex;
            align-items: center;
            justify-content: center;
            font-size: 18px;
        }
        .logo h1 {
            font-size: 18px;
            margin: 0;
            font-weight: 600;
        }
        .tagline {
            font-size: 12px;
            opacity: 0.9;
            margin: 2px 0 0 0;
        }
        .mode-selector {
            display: flex;
            overflow-x: auto;
            gap: 8px;
            padding-bottom: 4px;
            -webkit-overflow-scrolling: touch;
        }
        .mode-btn {
            padding: 8px 16px;
            background: rgba(255, 255, 255, 0.1);
            border: none;
            border-radius: 20px;
            color: white;
            font-size: 13px;
            display: flex;
            align-items: center;
            gap: 6px;
            white-space: nowrap;
        }
        .mode-btn.active {
            background: white;
            color: #667eea;
        }
        .main-content {
            flex: 1;
            display: flex;
            flex-direction: column;
            padding: var(--mobile-padding);
        }
        .chat-container {
            flex: 1;
            display: flex;
            flex-direction: column;
            background: white;
            border-radius: 16px;
            box-shadow: 0 2px 20px rgba(0, 0, 0, 0.05);
            overflow: hidden;
            margin-bottom: var(--mobile-padding);
        }
        .chat-header {
            padding: 16px;
            border-bottom: 1px solid #e5e7eb;
            display: flex;
            justify-content: space-between;
            align-items: center;
        }
        .chat-header h2 {
            font-size: 16px;
            margin: 0;
            font-weight: 600;
        }
        .clear-chat-btn {
            padding: 6px 12px;
            background: #f3f4f6;
            border: none;
            border-radius: 8px;
            color: #6b7280;
            font-size: 12px;
            display: flex;
            align-items: center;
            gap: 4px;
        }
        .net-search {
            padding: 12px 16px;
            border-bottom: 1px solid #e5e7eb;
            display: flex;
            align-items: center;
            gap: 8px;
        }
        .net-search input[type="checkbox"] {
            width: 18px;
            height: 18px;
        }
        .messages-container {
            flex: 1;
            padding: 16px;
            overflow-y: auto;
            -webkit-overflow-scrolling: touch;
            max-height: 50vh;
        }
        .message {
            display: flex;
            gap: 12px;
            margin-bottom: 20px;
        }
        .message.user {
            flex-direction: row-reverse;
        }
        .avatar {
            width: 32px;
            height: 32px;
            border-radius: 50%;
            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
            display: flex;
            align-items: center;
            justify-content: center;
            color: white;
            flex-shrink: 0;
        }
        .message.user .avatar {
            background: #10b981;
        }
        .message-content {
            max-width: 75%;
        }
        .message-text {
            padding: 12px;
            border-radius: 12px;
            background: #f3f4f6;
            line-height: 1.5;
            font-size: 14px;
        }
        .message.user .message-text {
            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
            color: white;
            border-radius: 12px 12px 4px 12px;
        }
        .message-image {
            max-width: 100%;
            border-radius: 12px;
            margin-top: 8px;
        }
        .input-container {
            padding: 16px;
            border-top: 1px solid #e5e7eb;
        }
        .upload-area {
            padding: 20px;
            border: 2px dashed #d1d5db;
            border-radius: 12px;
            text-align: center;
            margin-bottom: 16px;
            background: #f9fafb;
        }
        .upload-area i {
            font-size: 24px;
            color: #6b7280;
            margin-bottom: 8px;
        }
        .upload-area p {
            margin: 8px 0;
            color: #374151;
            font-weight: 500;
        }
        .upload-area span {
            font-size: 12px;
            color: #9ca3af;
        }
        .input-area {
            display: flex;
            align-items: flex-end;
            gap: 8px;
            background: #f9fafb;
            border-radius: 24px;
            padding: 8px;
        }
        .action-buttons {
            display: flex;
            gap: 4px;
            margin-bottom: 8px;
        }
        .action-btn, .send-btn, .generate-image {
            width: 36px;
            height: 36px;
            border: none;
            border-radius: 50%;
            display: flex;
            align-items: center;
            justify-content: center;
            background: white;
            color: #6b7280;
            box-shadow: 0 2px 8px rgba(0, 0, 0, 0.05);
        }
        .send-btn {
            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
            color: white;
            flex-shrink: 0;
        }
        .generate-image {
            background: #10b981;
            color: white;
            flex-shrink: 0;
        }
        .text-input {
            flex: 1;
            border: none;
            background: transparent;
            padding: 8px 12px;
            font-size: 14px;
            resize: none;
            min-height: 36px;
            max-height: 120px;
            outline: none;
            font-family: inherit;
            width: 100% !important; /* 覆盖行内样式 */
        }
        .text-input::placeholder {
            color: #9ca3af;
        }
        .status {
            padding: 12px;
            border-radius: 8px;
            margin-top: 12px;
            display: flex;
            align-items: center;
            gap: 8px;
            font-size: 13px;
            display: none;
        }
        .status.info {
            background: #eff6ff;
            color: #1d4ed8;
        }
        .status.success {
            background: #ecfdf5;
            color: #047857;
        }
        .status.error {
            background: #fef2f2;
            color: #dc2626;
        }
        .side-panel {
            background: white;
            border-radius: 16px;
            padding: 20px;
            box-shadow: 0 2px 20px rgba(0, 0, 0, 0.05);
        }
        .panel-section {
            margin-bottom: 24px;
        }
        .panel-section h3 {
            font-size: 16px;
            margin: 0 0 16px 0;
            display: flex;
            align-items: center;
            gap: 8px;
            color: #374151;
        }
        .image-preview {
            width: 100%;
            height: 200px;
            border-radius: 12px;
            background: #f9fafb;
            display: flex;
            flex-direction: column;
            align-items: center;
            justify-content: center;
            margin-bottom: 16px;
            overflow: hidden;
        }
        .image-preview img {
            width: 100%;
            height: 100%;
            object-fit: cover;
            display: none;
        }
        footer {
            padding: 20px var(--mobile-padding);
            background: #f9fafb;
            text-align: center;
            border-top: 1px solid #e5e7eb;
        }
        footer p {
            margin: 0 0 16px 0;
            color: #6b7280;
            font-size: 12px;
        }
        .footer-links {
            display: flex;
            flex-wrap: wrap;
            justify-content: center;
            gap: 16px;
        }
        .footer-links a {
            color: #667eea;
            text-decoration: none;
            font-size: 12px;
            display: flex;
            align-items: center;
            gap: 4px;
        }
        /* 响应式调整 */
        @media (min-width: 768px) {
            .main-content {
                flex-direction: row;
                gap: var(--mobile-padding);
            }
            .chat-container {
                flex: 2;
                margin-bottom: 0;
            }
            .side-panel {
                flex: 1;
            }
            .messages-container {
                max-height: 60vh;
            }
        }
        /* 触摸设备优化 */
        @media (hover: none) and (pointer: coarse) {
            button, .action-btn, .send-btn, .generate-image {
                min-height: 44px;
                min-width: 44px;
            }
            .mode-btn {
                padding: 10px 20px;
            }
            .clear-chat-btn {
                padding: 8px 16px;
            }
        }
        /* 隐藏桌面端特定元素 */
        .desktop-only {
            display: none;
        }
        /* 滚动条样式 */
        ::-webkit-scrollbar {
            width: 4px;
        }
        ::-webkit-scrollbar-track {
            background: transparent;
        }
        ::-webkit-scrollbar-thumb {
            background: #d1d5db;
            border-radius: 2px;
        }

static\script.js

前端语音控制的js文件

代码语言:javascript
复制
let websocket;
let audioContext;
let scriptProcessor;
let audioInput;
let audioStream;
let isRecording = false; // 添加录音状态标记
async function getSpeechToken() {
    try {
        const response = await fetch('/api/speech/token');
        if (!response.ok) {
            throw new Error(`HTTP error! status: ${response.status}`);
        }
        const data = await response.json();
        return {
            token: data.token,
            appkey: data.appkey
        };
    } catch (error) {
        console.error('获取Token失败:', error);
        // 如果后端接口失败,使用一个兼容性方案
        throw error;
    }
}
// 更新连接状态
function updateStatus(status) {
    const statusDisplay = document.getElementById('question') || document.getElementById('status');
    if (statusDisplay) {
        if (statusDisplay.tagName === 'TEXTAREA') {
            // 如果是textarea,我们只更新placeholder或添加特殊标记
            statusDisplay.placeholder = status;
        } else {
            statusDisplay.textContent = status;
            statusDisplay.style.color = status === '已连接' ? 'green' : 'red';
        }
    }
}
// 生成 UUID
function generateUUID() {
    return ([1e7] + -1e3 + -4e3 + -8e3 + -1e11).replace(/[018]/g, c =>
        (c ^ crypto.getRandomValues(new Uint8Array(1))[0] & 15 >> c / 4).toString(16)
    ).replace(/-/g, '');
}
// 将识别结果添加到textarea
function addToTextarea(text, isFinal = false) {
    const textarea = document.getElementById('question');
    if (!textarea) return;
    if (isFinal) {
        // 如果是最终结果,追加到末尾并换行
        if (textarea.value) {
            textarea.value += '\n' + text;
        } else {
            textarea.value = text;
        }
        textarea.value += '\n'; // 添加换行便于区分
        // 添加一个临时提示(最终结果)
        const tempText = document.createElement('div');
        tempText.textContent = `✓ 识别完成: ${text}`;
        tempText.style.color = 'green';
        tempText.style.fontWeight = 'bold';
        document.body.appendChild(tempText);
        setTimeout(() => tempText.remove(), 3000);
    } else {
        // 如果是中间结果,可以显示在别处或替换最后一行
        // 这里我们创建一个浮动显示
        showIntermediateResult(text);
    }
    // 滚动到textarea底部
    textarea.scrollTop = textarea.scrollHeight;
    // 触发input事件以便其他监听器能捕获
    textarea.dispatchEvent(new Event('input'));
}
// 显示中间结果(浮动显示)
function showIntermediateResult(text) {
    // 移除已有的中间结果显示
    let floatingDiv = document.getElementById('floating-result');
    if (!floatingDiv) {
        floatingDiv = document.createElement('div');
        floatingDiv.id = 'floating-result';
        floatingDiv.style.position = 'fixed';
        floatingDiv.style.bottom = '100px';
        floatingDiv.style.right = '20px';
        floatingDiv.style.backgroundColor = 'rgba(0,0,0,0.7)';
        floatingDiv.style.color = 'white';
        floatingDiv.style.padding = '10px';
        floatingDiv.style.borderRadius = '5px';
        floatingDiv.style.zIndex = '1000';
        document.body.appendChild(floatingDiv);
    }
    floatingDiv.textContent = `正在识别: ${text}`;
    floatingDiv.style.display = 'block';
    // 3秒后淡出
    clearTimeout(floatingDiv.timeout);
    floatingDiv.timeout = setTimeout(() => {
        floatingDiv.style.opacity = '0';
        floatingDiv.style.transition = 'opacity 1s';
        setTimeout(() => {
            if (floatingDiv.parentNode) {
                floatingDiv.parentNode.removeChild(floatingDiv);
            }
        }, 1000);
    }, 3000);
}
// 日志消息
function logMessage(message) {
    console.log('日志:', message); // 保持控制台日志
    // 可选:在页面上其他位置显示日志
    const logArea = document.getElementById('messages') || document.getElementById('log-area');
    if (logArea) {
        const messageElement = document.createElement('div');
        messageElement.textContent = message;
        logArea.appendChild(messageElement);
        logArea.scrollTop = logArea.scrollHeight;
    }
}
// 打开WebSocket连接
async function connectWebSocket() {
    try {
        // 1. 从后端获取临时Token
        const auth = await getSpeechToken();
        const appkey = auth.appkey;
        const token = auth.token;
        // 2. 原有代码不变,只是token和appkey现在是动态获取的
        const socketUrl = `wss://nls-gateway.cn-shanghai.aliyuncs.com/ws/v1?token=${token}`;
        websocket = new WebSocket(socketUrl);
        websocket.onopen = function() {
            updateStatus('已连接');
            logMessage('连接到 WebSocket 服务器');
            var startTranscriptionMessage = {
                header: {
                    appkey: appkey,  // 使用动态获取的appkey
                    namespace: "SpeechTranscriber",
                    name: "StartTranscription",
                    task_id: generateUUID(),
                    message_id: generateUUID()
                },
                payload: {
                    "format": "pcm",
                    "sample_rate": 16000,
                    "enable_intermediate_result": true,
                    "enable_punctuation_prediction": true,
                    "enable_inverse_text_normalization": true
                }
            };
            websocket.send(JSON.stringify(startTranscriptionMessage));
        };
        // 3. 原有的事件监听器保持不变
        websocket.onmessage = function(event) {
            logMessage('收到消息: ' + event.data);
            try {
                const message = JSON.parse(event.data);
                if (message.header.name === "TranscriptionStarted") {
                    logMessage('语音识别已启动');
                } 
                else if (message.header.name === "TranscriptionResultChanged") {
                    const text = message.payload.result;
                    if (text) {
                        addToTextarea(text, false);
                        logMessage('中间结果: ' + text);
                    }
                }
                else if (message.header.name === "SentenceEnd") {
                    const text = message.payload.result;
                    if (text) {
                        addToTextarea(text, true);
                        logMessage('识别完成: ' + text);
                    }
                }
                else if (message.header.name === "TranscriptionCompleted") {
                    logMessage('语音识别完成');
                }
                else if (message.header.name === "TaskFailed") {
                    logMessage('识别失败: ' + (message.payload.message || '未知错误'));
                }
            } catch (e) {
                logMessage('解析消息失败: ' + e.message);
            }
        };
        websocket.onerror = function(event) {
            updateStatus('错误');
            logMessage('WebSocket 错误: ' + event.type);
        };
        websocket.onclose = function() {
            updateStatus('断开连接');
            logMessage('与 WebSocket 服务器断开');
        };
        // 4. 原有录音逻辑保持不变
        startRecording();
    } catch (error) {
        console.error('连接WebSocket失败:', error);
        // 5. 可选的兼容性提示
        const textarea = document.getElementById('question');
        if (textarea) {
            textarea.value += '\n[语音服务暂时不可用,请稍后重试]\n';
            textarea.scrollTop = textarea.scrollHeight;
        }
        updateStatus('连接失败');
    }
}
// 断开WebSocket连接
function disconnectWebSocket() {
    if (websocket) {
        websocket.close();
    }
    updateStatus('未连接');
    isRecording = false;
}
// 开始录音
async function startRecording() {
    try {
        if (isRecording) {
            logMessage('已经在录音中');
            return;
        }
        logMessage('开始录音...');
        // 获取音频输入设备
        audioStream = await navigator.mediaDevices.getUserMedia({ 
            audio: {
                sampleRate: 16000,
                channelCount: 1,
                echoCancellation: true,
                noiseSuppression: true
            } 
        });
        audioContext = new (window.AudioContext || window.webkitAudioContext)({
            sampleRate: 16000
        });
        audioInput = audioContext.createMediaStreamSource(audioStream);
        // 设置缓冲区大小为2048的脚本处理器
        scriptProcessor = audioContext.createScriptProcessor(2048, 1, 1);
        scriptProcessor.onaudioprocess = function(event) {
            if (!isRecording) return;
            const inputData = event.inputBuffer.getChannelData(0);
            const inputData16 = new Int16Array(inputData.length);
            for (let i = 0; i < inputData.length; ++i) {
                inputData16[i] = Math.max(-1, Math.min(1, inputData[i])) * 0x7FFF; // PCM 16-bit
            }
            if (websocket && websocket.readyState === WebSocket.OPEN) {
                websocket.send(inputData16.buffer);
            }
        };
        audioInput.connect(scriptProcessor);
        scriptProcessor.connect(audioContext.destination);
        isRecording = true;
        logMessage('录音已开始');
        // 在textarea中添加开始录音标记
        const textarea = document.getElementById('question');
        if (textarea) {
            if (textarea.value && !textarea.value.endsWith('\n')) {
                textarea.value += '\n';
            }
            //textarea.value += '[开始录音...]\n';
            textarea.scrollTop = textarea.scrollHeight;
        }
    } catch (e) {
        logMessage('录音失败: ' + e.message);
        isRecording = false;
    }
}
// 停止录音
function stopRecording() {
    if (!isRecording) {
        logMessage('当前没有在录音');
        return;
    }
    if (scriptProcessor) {
        scriptProcessor.disconnect();
        scriptProcessor = null;
    }
    if (audioInput) {
        audioInput.disconnect();
        audioInput = null;
    }
    if (audioStream) {
        audioStream.getTracks().forEach(track => track.stop());
        audioStream = null;
    }
    if (audioContext) {
        audioContext.close();
        audioContext = null;
    }
    isRecording = false;
    logMessage('录音已停止');
    // 在textarea中添加停止录音标记
    const textarea = document.getElementById('question');
    if (textarea) {
        //textarea.value += '[录音结束]\n';
        textarea.scrollTop = textarea.scrollHeight;
        textarea.dispatchEvent(new Event('input'));
    }
	disconnectWebSocket()
}
// 添加一个清理函数
function clearTextarea() {
    const textarea = document.getElementById('question');
    if (textarea) {
        textarea.value = '';
        textarea.dispatchEvent(new Event('input'));
    }
}
// 添加回车键处理(如果textarea已有onkeyup="doEnter(event)")
function doEnter(event) {
    if (event.key === 'Enter' && !event.shiftKey) {
        event.preventDefault();
        // 处理发送逻辑
        const textarea = document.getElementById('question');
        if (textarea && textarea.value.trim()) {
            // 这里可以添加发送消息的逻辑
            console.log('发送消息:', textarea.value);
            // 清空或保留内容
            textarea.value = '';
        }
    }
}

static\script1.js

语音外的js文件

代码语言:javascript
复制
let switch_voice = false;    // 全局变量,默认关闭语音播报
const synth = window.speechSynthesis;  // 全局变量,语音合成对象
sessionStorage.clear();   // 每次刷新页面时均将之前的内容清空
// 定义语音朗读代码
function speak(content) {
    // 实例化语音合成对象,并将要朗读的文件作为参数传入
    let utterance = new SpeechSynthesisUtterance(content);
    utterance.lang = 'zh-CN';   // 设置为中英文(基于操作系统的区域设置而定)
    utterance.volume = 1;       // 音量:默认为1,可以调整为0.8,1.2等
    utterance.rate = 1;         // 语速,默认为1,也可以调整得更快或更慢
    synth.cancel();             // 清空语音合成队列,表示不再朗读
    synth.speak(utterance);     // 朗读
}
// 定义朗读与停止的功能切换
function readText(obj) {
    // 参数obj代表由AI回复的过程中动态生成的朗读按钮
    let chatbox = document.getElementById('chatbox');
    if (switch_voice) {     // 如果语音播报已开启,则关闭
        obj.innerText = '朗读';
        switch_voice = false;
        synth.cancel();
    } else {
        obj.innerText = '停止';
        switch_voice = true;
        synth.cancel();
        // 获取到该按钮的父容器的内容:即AI回复的DIV里面的内容
        // 由于朗读按钮的定义也在该DIV的innerHTML,所以要将其删除
        let content = obj.parentNode.innerHTML.split("<button")[0];
        if (content) {
            speak(content);
        }
    }
}
// 响应Ctrl+Enter的回车事件,用于代替点击“智能问答”按钮的功能
function doEnter(e) {
    if (e.key == "Enter" && e.ctrlKey) {
        doAsk();
    }
}
// 滚动到最底部
function scrollToBottom() {
    var chatbox = document.getElementById('chatbox');
    chatbox.scrollTop = chatbox.scrollHeight;
}
function clearChat(){
 question.value =""
}
// 此处进行一下判断,如果用户上传了图片,则说明是图像识别功能,否则是普通问答,需要调用的函数也不一样
function doAsk() {
    // 创建一个提问的DIV元素,并设置其class属性为ask-box,以匹配CSS样式
    let ask = document.createElement('div');
    ask.setAttribute("class", "ask-box");
    if (sessionStorage.getItem("image")) {
        ask.innerHTML = '<img src="' + sessionStorage.getItem("image") + '" style="width:100%"><br/>' + question.value;
        document.getElementById("chatbox").append(ask);
        scrollToBottom();
        recognizeImage();
    }
    else {
        ask.innerHTML = document.getElementById("question").value;
        // 将该DIV元素添加到chatbox对话框DIV中作为一个子元素
        document.getElementById("chatbox").append(ask);
        scrollToBottom();
        doAnswer();
    }
}
// 普通文本问答的前端代码实现
function doAnswer() {
    // 创建AI回复的DIV元素,并设置其class属性为answer-box,以匹配CSS样式
    let answer = document.createElement('div');
    answer.setAttribute("class", "answer-box");
    document.getElementById("chatbox").append(answer);
    let content = document.getElementById("question").value;
    let checkbox  = document.getElementById("net-search");
    if (checkbox.checked) {
        params = {"content": content, "search": true}
    }
    else {
        params = {"content": content, "search": false}
    }
    // 调用fetch函数实现后端响应流的读取和解析并添加到回复DIV中
    fetch("/stream", {
            method: 'POST',
            headers: {'Content-Type': 'application/json'},
            body: JSON.stringify(params)
    }).then(async result => {
        const reader = result.body.getReader();
        const textDecoder = new TextDecoder("utf-8");
        while(true){
            const {done, value} = await reader.read();
            if(done) {
                // 朗读按钮直接添加到回复的内容后面
                answer.innerHTML += "<button onclick='readText(this)' class='read-button' id='speak'>朗读</button>";
                scrollToBottom();
                return
            }
            let jsonList = textDecoder.decode(value).split("\n");
            for (let i=0; i<jsonList.length-1; i++) {
                jsonObj = JSON.parse(jsonList[i]);
                // 将回复内容的\n替换为<br>以便于在浏览器中换行
                answer.innerHTML += jsonObj['content'].replaceAll("\n", "<br/>");
            }
        }
    });
}
// 保存图片到sessionStorage并进行预览显示
function saveAndPreview() {
    // 获取文件上传框元素
    var input = document.getElementById('imageInput');
    // 判断是否有文件,如果有则执行以下操作
    if (input.files && input.files[0]) {
        // 使用FileReader读取文件流,并响应onload事件
        var reader = new FileReader();
        reader.onload = function(e) {
            var img = document.getElementById('preview');
            img.src = e.target.result;    // 获取Base64编码
            img.style.display = 'block';  // 将预览DIV元素显示出来
            // 缩小userask文本域的宽度,以显示预览图
            document.getElementById('question').style.width = '450px';
            document.getElementById('imageDiv').style.display = 'block';
            // 将图片数据存储到sessionStorage
            sessionStorage.clear();
            sessionStorage.setItem('image', e.target.result);
            // 焦点放置到输入框中,并提供默认的提示词
            var question = document.getElementById("question");
            question.focus();
            // 为了让用户少一些输入,提前预置一段提示词
            question.value = "请识别图片中的内容,尽量覆盖尽可能多的信息,描述尽量简洁明了。";
        }
        reader.readAsDataURL(input.files[0]);
    }
}
function addImage() {
    document.getElementById('imageInput').click();
}
// 图像识别的前后端对接
function recognizeImage() {
    let answer = document.createElement('div');
    let question = document.getElementById("question");
    let ask = document.createElement('div');
    ask.setAttribute("class", "ask-box");
    // 直接以Base64方式上传图片到后台
    let params = {"base64": sessionStorage.getItem("image"), "content": question.value}
    answer.setAttribute("class", "answer-box");
    document.getElementById("chatbox").append(answer);
    fetch("/recognize", {
        method: 'POST',
        headers: {'Content-Type': 'application/json'},
        body: JSON.stringify(params)
    }).then(async result => {
        const reader = result.body.getReader();
        const textDecoder = new TextDecoder("utf-8");
        while(true){
            const {done, value} = await reader.read();
            if(done) {
                answer.innerHTML += "<button onclick='readText(this)' class='read-button' id='speak'>朗读</button>";
                scrollToBottom();
                return
            }
            let jsonList = textDecoder.decode(value).split("\n");
            for (let i=0; i<jsonList.length-1; i++) {
                jsonObj = JSON.parse(jsonList[i]);
                answer.innerHTML += jsonObj['content'].replaceAll("\n", "<br/>");
            }
        }
    });
    // 删除sessionStorage中的图片,并隐藏预览框,还原初始状态
    sessionStorage.clear();
    document.getElementById("imageDiv").style.display = "none";
    document.getElementById("question").style.width = "560px";
}
// 图像生成的前端代码对接与渲染
function generateImage() {
    let ask = document.createElement('div');
    ask.setAttribute("class", "ask-box");
    ask.innerHTML = document.getElementById("question").value;
    document.getElementById("chatbox").append(ask);
    scrollToBottom();
    let answer = document.createElement('div');
    answer.setAttribute("class", "answer-box");
    fetch("/generate", {
        method: 'POST',
        headers: {'Content-Type': 'application/json'},
        body: JSON.stringify({"content": document.getElementById("question").value})
    }).then(result => {
        return result.json();
    }).then(data => {
        let url = data['image_url'];
        // 将图片渲染到回复框中
        answer.innerHTML = "<img src='"+url+"' style='width: 100%'/>";
        document.getElementById("chatbox").append(answer);
        scrollToBottom();
    });
}

以下是他生成的图

提示词:武松打虎

提示词:帮我生成一幅美丽的风景画

顾翔凡言:AI时代,掌握已知的未知已非难事,关键在于如何发现未知的未知——而它往往就藏在探索已知之未知的过程中。

本文参与 腾讯云自媒体同步曝光计划,分享自微信公众号。
原始发表:2026-01-27,如有侵权请联系 cloudcommunity@tencent.com 删除

本文分享自 微信公众号,前往查看

如有侵权,请联系 cloudcommunity@tencent.com 删除。

本文参与 腾讯云自媒体同步曝光计划  ,欢迎热爱写作的你一起参与!

评论
登录后参与评论
0 条评论
热度
最新
推荐阅读
领券
问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档