实战教程:从零搭建 RAG 服务

目标

从零实现一个最小可用的 RAG(检索增强生成)服务:含文档索引、向量检索、上下文组装与生成回答,并标注引用来源。以 Node.js 为例,其他语言可类比实现。

准备与依赖

# 初始化项目
mkdir rag-demo && cd rag-demo
npm init -y
npm i express openai cors dotenv
# 如果使用 PGVector 或 Milvus 等请安装对应客户端

数据准备与分块

将原始文档放置在 ./data 目录,做清洗与分块。分块建议以标题+段落为单位,长度控制在 300-800 字左右。

// scripts/chunk.js(示意)
import fs from 'node:fs';
export function chunkText(text){
  return text.split(/\n\n+/).map(x=>x.trim()).filter(Boolean);
}
export function loadDocs(dir='./data'){
  return fs.readdirSync(dir).map(fn=>({ id: fn, text: fs.readFileSync(`${dir}/${fn}`, 'utf8'), meta: { source: fn } }));
}

嵌入与索引

// scripts/index.js(教学用内存索引)
import OpenAI from 'openai';
import { chunkText, loadDocs } from './chunk.js';
import fs from 'node:fs';
import 'dotenv/config';
const client = new OpenAI({ apiKey: process.env.API_KEY, baseURL: process.env.API_BASE });

const docs = loadDocs();
const entries = [];
for (const d of docs){
  const chunks = chunkText(d.text);
  const embeds = await client.embeddings.create({ model: process.env.EMBED_MODEL, input: chunks });
  embeds.data.forEach((v,i)=>{
    entries.push({ id: `${d.id}-${i}`, vector: v.embedding, text: chunks[i], meta: d.meta });
  });
}
fs.writeFileSync('./vectorStore.json', JSON.stringify(entries));
console.log('索引完成,写入 vectorStore.json');
提示:生产环境建议使用向量数据库并记录版本与血缘信息。

相似度检索

// lib/search.js(简化余弦相似度)
export function cosineSimilarity(a,b){
  const dot = a.reduce((s,x,i)=>s + x*b[i], 0);
  const na = Math.sqrt(a.reduce((s,x)=>s + x*x, 0));
  const nb = Math.sqrt(b.reduce((s,x)=>s + x*x, 0));
  return dot / (na*nb + 1e-8);
}
export function topK(entries, qvec, k=6){
  return entries.map(e=>({ ...e, score: cosineSimilarity(e.vector, qvec) }))
    .sort((x,y)=>y.score - x.score)
    .slice(0,k);
}

服务端路由

// server.js(最小可用后端)
import express from 'express';
import cors from 'cors';
import fs from 'node:fs';
import OpenAI from 'openai';
import 'dotenv/config';
import { topK } from './lib/search.js';
const app = express(); app.use(cors()); app.use(express.json());
const client = new OpenAI({ apiKey: process.env.API_KEY, baseURL: process.env.API_BASE });
const store = JSON.parse(fs.readFileSync('./vectorStore.json', 'utf8'));

app.get('/health', (_,res)=>res.json({ ok: true }));

app.post('/query', async (req,res)=>{
  const { query } = req.body;
  if(!query) return res.status(400).json({ error: 'missing query' });
  const qembed = await client.embeddings.create({ model: process.env.EMBED_MODEL, input: query });
  const hits = topK(store, qembed.data[0].embedding, 6);
  const context = hits.map(h=>`【片段】${h.text}\n来源:${h.meta.source}`).join('\n\n');
  const completion = await client.chat.completions.create({
    model: process.env.MODEL_NAME,
    messages: [
      { role: 'system', content: '仅依据提供片段回答,并在末尾列出引用来源。' },
      { role: 'user', content: `资料:\n\n${context}\n\n问题:${query}` }
    ]
  });
  res.json({ answer: completion.choices[0].message.content, citations: hits.map(h=>h.meta.source) });
});

app.listen(8787, ()=>console.log('RAG 服务已启动 http://localhost:8787'));
# 运行
API_KEY=... API_BASE=... EMBED_MODEL=... MODEL_NAME=...
node scripts/index.js
node server.js

前端调用示例

// fetch 查询
const resp = await fetch('http://localhost:8787/query', {
  method: 'POST', headers: { 'Content-Type': 'application/json' },
  body: JSON.stringify({ query: '公司隐私策略的核心要点?' })
});
const data = await resp.json();
console.log(data.answer, data.citations);

扩展与生产建议

评测与度量(LLM-as-Judge / 覆盖率 / 引用准确率)

以下示例展示三类评测:LLM-as-Judge、信息覆盖率(Coverage)与引用准确率(Citation Accuracy)。

// eval/judge.js(LLM-as-Judge 示意)
import OpenAI from 'openai';
import 'dotenv/config';
const client = new OpenAI({ apiKey: process.env.API_KEY, baseURL: process.env.API_BASE });
export async function judge({ question, answer, context }){
  const prompt = `你是评审,依据提供片段严格评估答案的真实性与有用性:\n- 真实性(faithfulness):答案是否仅依据片段,不引入外部臆断?\n- 有用性(usefulness):答案是否清晰、完整、结构化?\n请分别给出 0-5 分,并给出一句话解释。`;
  const msg = [
    { role: 'system', content: '你是严谨的中文评审助手。' },
    { role: 'user', content: `片段:\n${context}\n\n问题:${question}\n\n答案:${answer}` }
  ];
  const res = await client.chat.completions.create({ model: process.env.MODEL_NAME, messages: msg });
  return res.choices[0].message.content;
}
// eval/coverage.js(信息覆盖率,基于嵌入相似度)
import OpenAI from 'openai';
import 'dotenv/config';
const client = new OpenAI({ apiKey: process.env.API_KEY, baseURL: process.env.API_BASE });
export async function coverage({ answer, topChunks, embedModel }){
  const a = await client.embeddings.create({ model: embedModel, input: answer });
  const avec = a.data[0].embedding;
  const sims = topChunks.map(t => {
    const v = t.vector;
    const dot = v.reduce((s,x,i)=>s + x*avec[i], 0);
    const na = Math.sqrt(v.reduce((s,x)=>s + x*x, 0));
    const nb = Math.sqrt(avec.reduce((s,x)=>s + x*x, 0));
    const cos = dot / (na*nb + 1e-8);
    return cos;
  });
  const matched = sims.filter(s => s >= 0.3).length; // 阈值可调
  return { matched, total: topChunks.length, rate: matched / Math.max(topChunks.length, 1) };
}
// eval/citation.js(引用准确率)
export function citationAccuracy({ citations, hits }){
  const set = new Set(hits.map(h => String(h.meta?.source).trim()));
  const cited = citations.map(c => String(c).trim());
  const correct = cited.filter(c => set.has(c)).length;
  return { correct, total: cited.length, rate: cited.length ? correct / cited.length : 0 };
}
提示:评测脚本应与生产日志打通,支持抽样与批量评测;可加入偏好度评估与结构化评分表。