From 9bb830c277f830414387117d02c9cb74e4a01420 Mon Sep 17 00:00:00 2001 From: zimk Date: Tue, 17 Mar 2026 00:00:16 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E5=BC=95=E5=85=A5=20pdf.js=EF=BC=8C?= =?UTF-8?q?=E6=94=AF=E6=8C=81=20PDF=20=E6=96=87=E6=9C=AC=E6=8F=90=E5=8F=96?= =?UTF-8?q?=E5=86=85=E8=81=94?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- public/app.js | 25 ++++++++++++++++++++++++- public/index.html | 1 + 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/public/app.js b/public/app.js index b2af219..ac4a785 100644 --- a/public/app.js +++ b/public/app.js @@ -364,6 +364,23 @@ async function addPendingFiles(files) { } catch (err) { console.warn('pptx 解析失败', err); } + } else if (kind === 'pdf') { + try { + const arrayBuffer = await readFileAsArrayBuffer(file); + const pdfjsLib = window['pdfjs-dist/build/pdf']; + pdfjsLib.GlobalWorkerOptions.workerSrc = 'https://cdn.jsdelivr.net/npm/pdfjs-dist@3.11.174/build/pdf.worker.min.js'; + const pdf = await pdfjsLib.getDocument({ data: arrayBuffer }).promise; + const pageTexts = []; + for (let i = 1; i <= pdf.numPages; i++) { + const page = await pdf.getPage(i); + const textContent = await page.getTextContent(); + const pageText = textContent.items.map((item) => item.str).join(' '); + if (pageText.trim()) pageTexts.push(`[第 ${i} 页]\n${pageText}`); + } + parsedText = pageTexts.join('\n\n') || '(PDF 文本提取为空,可能是扫描件)'; + } catch (err) { + console.warn('pdf.js 解析失败', err); + } } const dataUrl = await readFileAsDataURL(file); @@ -539,8 +556,14 @@ function buildUserMessage(message, attachments) { type: 'text', text: `文件名:${file.name}\n内容:\n\`\`\`\n${decoded}\n\`\`\``, }); + } else if ((file.kind === 'docx' || file.kind === 'xlsx' || file.kind === 'pptx' || file.kind === 'pdf') && file.parsedText != null) { + // Office 文档 / PDF:使用前端解析出的文本内联 + content.push({ + type: 'text', + text: `文件名:${file.name}\n内容:\n\`\`\`\n${file.parsedText}\n\`\`\``, + }); } else { - // PDF / 其他二进制:记录下来,后面统一追加描述 + // 其他二进制:记录下来,后面统一追加描述 nonInlineable.push(file); } }); diff --git a/public/index.html b/public/index.html index e1066c3..cf21053 100644 --- a/public/index.html +++ b/public/index.html @@ -91,6 +91,7 @@ +