从零实现一个最小可用的 RAG(检索增强生成)服务:含文档索引、向量检索、上下文组装与生成回答,并标注引用来源。以 Node.js 为例,其他语言可类比实现。
# 初始化项目
mkdir rag-demo && cd rag-demo
npm init -y
npm i express openai cors dotenv
# 如果使用 PGVector 或 Milvus 等请安装对应客户端
将原始文档放置在 ./data 目录,做清洗与分块。分块建议以标题+段落为单位,长度控制在 300-800 字左右。
// scripts/chunk.js(示意)
import fs from 'node:fs';
export function chunkText(text){
return text.split(/\n\n+/).map(x=>x.trim()).filter(Boolean);
}
export function loadDocs(dir='./data'){
return fs.readdirSync(dir).map(fn=>({ id: fn, text: fs.readFileSync(`${dir}/${fn}`, 'utf8'), meta: { source: fn } }));
}
// scripts/index.js(教学用内存索引)
import OpenAI from 'openai';
import { chunkText, loadDocs } from './chunk.js';
import fs from 'node:fs';
import 'dotenv/config';
const client = new OpenAI({ apiKey: process.env.API_KEY, baseURL: process.env.API_BASE });
const docs = loadDocs();
const entries = [];
for (const d of docs){
const chunks = chunkText(d.text);
const embeds = await client.embeddings.create({ model: process.env.EMBED_MODEL, input: chunks });
embeds.data.forEach((v,i)=>{
entries.push({ id: `${d.id}-${i}`, vector: v.embedding, text: chunks[i], meta: d.meta });
});
}
fs.writeFileSync('./vectorStore.json', JSON.stringify(entries));
console.log('索引完成,写入 vectorStore.json');
// lib/search.js(简化余弦相似度)
export function cosineSimilarity(a,b){
const dot = a.reduce((s,x,i)=>s + x*b[i], 0);
const na = Math.sqrt(a.reduce((s,x)=>s + x*x, 0));
const nb = Math.sqrt(b.reduce((s,x)=>s + x*x, 0));
return dot / (na*nb + 1e-8);
}
export function topK(entries, qvec, k=6){
return entries.map(e=>({ ...e, score: cosineSimilarity(e.vector, qvec) }))
.sort((x,y)=>y.score - x.score)
.slice(0,k);
}
// server.js(最小可用后端)
import express from 'express';
import cors from 'cors';
import fs from 'node:fs';
import OpenAI from 'openai';
import 'dotenv/config';
import { topK } from './lib/search.js';
const app = express(); app.use(cors()); app.use(express.json());
const client = new OpenAI({ apiKey: process.env.API_KEY, baseURL: process.env.API_BASE });
const store = JSON.parse(fs.readFileSync('./vectorStore.json', 'utf8'));
app.get('/health', (_,res)=>res.json({ ok: true }));
app.post('/query', async (req,res)=>{
const { query } = req.body;
if(!query) return res.status(400).json({ error: 'missing query' });
const qembed = await client.embeddings.create({ model: process.env.EMBED_MODEL, input: query });
const hits = topK(store, qembed.data[0].embedding, 6);
const context = hits.map(h=>`【片段】${h.text}\n来源:${h.meta.source}`).join('\n\n');
const completion = await client.chat.completions.create({
model: process.env.MODEL_NAME,
messages: [
{ role: 'system', content: '仅依据提供片段回答,并在末尾列出引用来源。' },
{ role: 'user', content: `资料:\n\n${context}\n\n问题:${query}` }
]
});
res.json({ answer: completion.choices[0].message.content, citations: hits.map(h=>h.meta.source) });
});
app.listen(8787, ()=>console.log('RAG 服务已启动 http://localhost:8787'));
# 运行
API_KEY=... API_BASE=... EMBED_MODEL=... MODEL_NAME=...
node scripts/index.js
node server.js
// fetch 查询
const resp = await fetch('http://localhost:8787/query', {
method: 'POST', headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ query: '公司隐私策略的核心要点?' })
});
const data = await resp.json();
console.log(data.answer, data.citations);
以下示例展示三类评测:LLM-as-Judge、信息覆盖率(Coverage)与引用准确率(Citation Accuracy)。
// eval/judge.js(LLM-as-Judge 示意)
import OpenAI from 'openai';
import 'dotenv/config';
const client = new OpenAI({ apiKey: process.env.API_KEY, baseURL: process.env.API_BASE });
export async function judge({ question, answer, context }){
const prompt = `你是评审,依据提供片段严格评估答案的真实性与有用性:\n- 真实性(faithfulness):答案是否仅依据片段,不引入外部臆断?\n- 有用性(usefulness):答案是否清晰、完整、结构化?\n请分别给出 0-5 分,并给出一句话解释。`;
const msg = [
{ role: 'system', content: '你是严谨的中文评审助手。' },
{ role: 'user', content: `片段:\n${context}\n\n问题:${question}\n\n答案:${answer}` }
];
const res = await client.chat.completions.create({ model: process.env.MODEL_NAME, messages: msg });
return res.choices[0].message.content;
}
// eval/coverage.js(信息覆盖率,基于嵌入相似度)
import OpenAI from 'openai';
import 'dotenv/config';
const client = new OpenAI({ apiKey: process.env.API_KEY, baseURL: process.env.API_BASE });
export async function coverage({ answer, topChunks, embedModel }){
const a = await client.embeddings.create({ model: embedModel, input: answer });
const avec = a.data[0].embedding;
const sims = topChunks.map(t => {
const v = t.vector;
const dot = v.reduce((s,x,i)=>s + x*avec[i], 0);
const na = Math.sqrt(v.reduce((s,x)=>s + x*x, 0));
const nb = Math.sqrt(avec.reduce((s,x)=>s + x*x, 0));
const cos = dot / (na*nb + 1e-8);
return cos;
});
const matched = sims.filter(s => s >= 0.3).length; // 阈值可调
return { matched, total: topChunks.length, rate: matched / Math.max(topChunks.length, 1) };
}
// eval/citation.js(引用准确率)
export function citationAccuracy({ citations, hits }){
const set = new Set(hits.map(h => String(h.meta?.source).trim()));
const cited = citations.map(c => String(c).trim());
const correct = cited.filter(c => set.has(c)).length;
return { correct, total: cited.length, rate: cited.length ? correct / cited.length : 0 };
}