first commit

This commit is contained in:
2026-04-17 13:23:50 +08:00
commit d985b56ac6
46 changed files with 8171 additions and 0 deletions

216
src/mysql.js Normal file
View File

@@ -0,0 +1,216 @@
const mysql = require("mysql2/promise");
let pool = null;
function isMysqlConfigured() {
return Boolean(process.env.MYSQL_HOST && String(process.env.MYSQL_HOST).trim());
}
function getMysqlPool() {
if (!isMysqlConfigured()) {
return null;
}
if (!pool) {
pool = mysql.createPool({
host: process.env.MYSQL_HOST.trim(),
port: Number(process.env.MYSQL_PORT || 3306),
user: process.env.MYSQL_USER || "root",
password: process.env.MYSQL_PASSWORD || "",
database: process.env.MYSQL_DATABASE || "Resume",
waitForConnections: true,
connectionLimit: Number(process.env.MYSQL_POOL_SIZE || 10),
queueLimit: 0,
enableKeepAlive: true,
connectTimeout: Number(process.env.MYSQL_CONNECT_TIMEOUT_MS || 15000),
});
}
return pool;
}
async function testMysqlConnection() {
if (!isMysqlConfigured()) {
return { ok: false, skipped: true, message: "未配置 MYSQL_HOST" };
}
const p = getMysqlPool();
const conn = await p.getConnection();
try {
await conn.query("SELECT 1 AS ok");
const [rows] = await conn.query("SELECT DATABASE() AS db");
return {
ok: true,
skipped: false,
database: rows[0]?.db || null,
};
} finally {
conn.release();
}
}
async function getMysqlHealth() {
try {
const r = await testMysqlConnection();
if (r.skipped) {
return { ok: false, skipped: true };
}
return { ok: true, skipped: false, database: r.database };
} catch (err) {
return {
ok: false,
skipped: false,
error: err?.message || String(err),
};
}
}
const CREATE_RESUME_SUBMISSIONS_SQL = `
CREATE TABLE IF NOT EXISTS resume_submissions (
id BIGINT UNSIGNED NOT NULL AUTO_INCREMENT,
original_filename VARCHAR(512) NOT NULL,
mime_type VARCHAR(255) NOT NULL,
source_type VARCHAR(32) NOT NULL,
file_sha256 CHAR(64) NOT NULL,
name VARCHAR(255) NULL COMMENT '姓名',
city VARCHAR(255) NULL COMMENT '城市',
age VARCHAR(64) NULL COMMENT '年龄',
expected_salary VARCHAR(255) NULL COMMENT '期望薪资',
education_experience TEXT NULL COMMENT '教育经历',
education VARCHAR(512) NULL COMMENT '学历',
ability TEXT NULL COMMENT '能力',
work_experience TEXT NULL COMMENT '工作经历',
project_experience TEXT NULL COMMENT '项目经历',
tech_stack TEXT NULL COMMENT '技术栈',
interview_report JSON NULL COMMENT 'AI面试题与评估',
pdf_meta JSON NULL COMMENT 'PDF解析元信息',
parser VARCHAR(64) NULL,
parser_note TEXT NULL,
warnings_json JSON NULL,
created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY (id),
KEY idx_file_sha256 (file_sha256),
KEY idx_name (name(64)),
KEY idx_city (city(64)),
KEY idx_created_at (created_at)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci
`;
async function ensureResumeTables() {
const pool = getMysqlPool();
if (!pool) {
return { ok: false, skipped: true };
}
const conn = await pool.getConnection();
try {
const [legacy] = await conn.query(
`SELECT COUNT(*) AS n FROM INFORMATION_SCHEMA.COLUMNS
WHERE TABLE_SCHEMA = DATABASE()
AND TABLE_NAME = 'resume_submissions'
AND COLUMN_NAME = 'extracted_text'`
);
if (legacy[0]?.n > 0) {
await conn.query("DROP TABLE IF EXISTS resume_submissions");
console.log("[mysql] 已删除旧版 resume_submissions含 extracted_text将按字段重建");
}
await conn.query(CREATE_RESUME_SUBMISSIONS_SQL);
return { ok: true };
} finally {
conn.release();
}
}
function dbRowToParsed(row) {
if (!row) {
return null;
}
return {
姓名: row.name || "",
城市: row.city || "",
年龄: row.age || "",
期望薪资: row.expected_salary || "",
教育经历: row.education_experience || "",
学历: row.education || "",
能力: row.ability || "",
工作经历: row.work_experience || "",
项目经历: row.project_experience || "",
技术栈: row.tech_stack || "",
};
}
async function findDuplicateResumeByFileHash(fileSha256) {
const pool = getMysqlPool();
if (!pool || !fileSha256) {
return null;
}
const [rows] = await pool.execute(
`SELECT * FROM resume_submissions
WHERE file_sha256 = ?
AND interview_report IS NOT NULL
ORDER BY id DESC
LIMIT 1`,
[fileSha256]
);
return rows[0] || null;
}
async function insertResumeSubmission(row) {
const pool = getMysqlPool();
if (!pool) {
return null;
}
const sql = `
INSERT INTO resume_submissions (
original_filename,
mime_type,
source_type,
file_sha256,
name,
city,
age,
expected_salary,
education_experience,
education,
ability,
work_experience,
project_experience,
tech_stack,
interview_report,
pdf_meta,
parser,
parser_note,
warnings_json
) VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)
`;
const params = [
row.originalFilename,
row.mimeType,
row.sourceType,
row.fileSha256,
row.name ?? null,
row.city ?? null,
row.age ?? null,
row.expectedSalary ?? null,
row.educationExperience ?? null,
row.education ?? null,
row.ability ?? null,
row.workExperience ?? null,
row.projectExperience ?? null,
row.techStack ?? null,
row.interviewReport ?? null,
row.pdfMeta ?? null,
row.parser ?? null,
row.parserNote ?? null,
row.warningsJson ?? null,
];
const [result] = await pool.execute(sql, params);
return result.insertId;
}
module.exports = {
getMysqlPool,
testMysqlConnection,
getMysqlHealth,
isMysqlConfigured,
ensureResumeTables,
insertResumeSubmission,
findDuplicateResumeByFileHash,
dbRowToParsed,
};

874
src/server.js Normal file
View File

@@ -0,0 +1,874 @@
const path = require("path");
const crypto = require("crypto");
const { pathToFileURL } = require("url");
const express = require("express");
const cors = require("cors");
const multer = require("multer");
const mammoth = require("mammoth");
const { PDFParse } = require("pdf-parse");
const { createWorker } = require("tesseract.js");
const OpenAI = require("openai");
const {
getMysqlHealth,
testMysqlConnection,
ensureResumeTables,
insertResumeSubmission,
getMysqlPool,
findDuplicateResumeByFileHash,
dbRowToParsed,
} = require("./mysql");
const {
saveSession,
getSession,
deleteSession,
} = require("./upload-session-cache");
require("dotenv").config();
const app = express();
app.use(cors());
app.use(express.json({ limit: "1mb" }));
// 注意:静态资源必须在 API 路由之后挂载,否则部分环境下 /api/* 可能被错误处理
const resumeFields = [
"姓名",
"城市",
"年龄",
"期望薪资",
"教育经历",
"学历",
"能力",
"工作经历",
"项目经历",
"技术栈",
];
const deepseekClient = process.env.DEEPSEEK_API_KEY
? new OpenAI({
baseURL: "https://api.deepseek.com",
apiKey: process.env.DEEPSEEK_API_KEY,
})
: null;
function parseResumeText(text) {
const normalizedText = String(text || "")
.replace(/\r\n/g, "\n")
.replace(/\u3000/g, " ")
.trim();
const parsed = Object.fromEntries(resumeFields.map((field) => [field, ""]));
for (const field of resumeFields) {
const otherFields = resumeFields.filter((item) => item !== field).map((item) => item.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"));
const nextFieldPattern = otherFields.join("|");
const fieldPattern = field.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
const regex = new RegExp(
`${fieldPattern}\\s*[:]\\s*([\\s\\S]*?)(?=\\s*(?:${nextFieldPattern})\\s*[:]|$)`,
"i"
);
const match = normalizedText.match(regex);
if (match) {
parsed[field] = match[1].replace(/\s+/g, " ").trim();
}
}
return parsed;
}
function normalizeParsedResume(data) {
const normalized = Object.fromEntries(resumeFields.map((field) => [field, ""]));
if (!data || typeof data !== "object" || Array.isArray(data)) {
return normalized;
}
for (const field of resumeFields) {
const value = data[field];
normalized[field] = typeof value === "string" ? value.trim() : value == null ? "" : String(value).trim();
}
return normalized;
}
function normalizeStringArray(value) {
if (Array.isArray(value)) {
return value
.map((item) => (item == null ? "" : String(item).trim()))
.filter(Boolean);
}
if (typeof value === "string") {
return value
.split(/[,、;;]/)
.map((item) => item.trim())
.filter(Boolean);
}
return [];
}
function normalizeInterviewQuestions(value) {
if (!Array.isArray(value)) {
return [];
}
return value
.map((item) => ({
question: typeof item?.question === "string" ? item.question.trim() : "",
standardAnswer:
typeof item?.standardAnswer === "string"
? item.standardAnswer.trim()
: "",
}))
.filter((item) => item.question && item.standardAnswer);
}
function getDefaultInterviewQuestions() {
const defaults = [
"请简要介绍你最熟悉的一个项目,并说明你负责的核心模块。",
"你在该项目中解决过最复杂的技术问题是什么?如何定位与解决?",
"如果线上出现性能瓶颈,你会如何做排查和优化?",
"你如何保证代码质量和可维护性?",
"请解释你常用技术栈中的一个核心原理。",
"在多人协作开发中,你如何进行任务拆分与沟通?",
"你如何设计一个可扩展的后端接口?",
"你做过哪些稳定性保障措施(监控、告警、容错等)?",
"遇到需求变更时,你如何评估影响并快速调整?",
"请描述一次你主导或关键参与的优化成果(可量化最好)。",
];
return defaults.map((question) => ({
question,
standardAnswer:
"答案应包含明确场景、技术方案、实施步骤、结果数据和复盘总结,体现候选人的技术深度与工程思维。",
}));
}
function normalizeInterviewReport(data) {
const report = {
interviewQuestions: [],
abilitySummary: "",
rating: {
level: "",
score: "",
reason: "",
},
techStack: [],
strengths: [],
weaknesses: [],
};
if (!data || typeof data !== "object" || Array.isArray(data)) {
report.interviewQuestions = getDefaultInterviewQuestions();
return report;
}
report.interviewQuestions = normalizeInterviewQuestions(data.interviewQuestions);
if (report.interviewQuestions.length < 10) {
const defaults = getDefaultInterviewQuestions();
report.interviewQuestions = [
...report.interviewQuestions,
...defaults.slice(report.interviewQuestions.length, 10),
];
}
report.abilitySummary =
typeof data.abilitySummary === "string" ? data.abilitySummary.trim() : "";
report.rating.level =
typeof data?.rating?.level === "string" ? data.rating.level.trim() : "";
report.rating.score =
data?.rating?.score == null ? "" : String(data.rating.score).trim();
report.rating.reason =
typeof data?.rating?.reason === "string" ? data.rating.reason.trim() : "";
report.techStack = normalizeStringArray(data.techStack);
report.strengths = normalizeStringArray(data.strengths);
report.weaknesses = normalizeStringArray(data.weaknesses);
return report;
}
function extractJsonObject(content) {
if (!content) {
throw new Error("AI 未返回内容");
}
const fencedMatch = content.match(/```json\s*([\s\S]*?)\s*```/i);
if (fencedMatch) {
return JSON.parse(fencedMatch[1]);
}
const start = content.indexOf("{");
const end = content.lastIndexOf("}");
if (start === -1 || end === -1 || end <= start) {
throw new Error("AI 返回内容不是合法 JSON");
}
return JSON.parse(content.slice(start, end + 1));
}
async function parseResumeWithAI(text) {
if (!deepseekClient) {
return {
parsed: parseResumeText(text),
interviewReport: normalizeInterviewReport(null),
parser: "rule",
parserNote: "未配置 DEEPSEEK_API_KEY已使用规则解析面试题使用默认模板",
};
}
const prompt = [
"请从下面的简历原文中完成结构化提取和面试评估。",
"你只能返回一个合法 JSON不要输出任何解释文字。",
"JSON 顶层必须包含两个键parsed 和 interviewReport。",
"parsed 字段键名必须严格为:姓名、城市、年龄、期望薪资、教育经历、学历、能力、工作经历、技术栈、项目经历。",
"parsed 中字段缺失时填空字符串。",
"interviewReport 的结构必须为:",
"{",
' "interviewQuestions": [{"question":"问题1","standardAnswer":"标准答案1"}],',
' "abilitySummary": "能力总结",',
' "rating": {"level":"S/A/B/C","score":"0-100","reason":"评级原因"},',
' "techStack": ["技术1","技术2"],',
' "strengths": ["优点1","优点2"],',
' "weaknesses": ["缺点1","缺点2"]',
"}",
"interviewQuestions 至少生成 10 条,并且每条都要贴合该候选人简历内容。",
"简历原文:",
text,
].join("\n");
try {
const completion = await deepseekClient.chat.completions.create({
model: "deepseek-chat",
temperature: 0.1,
messages: [
{
role: "system",
content:
"你是资深技术面试官与简历评估助手。你只能返回合法 JSON禁止输出 markdown 代码块和解释文本。",
},
{
role: "user",
content: prompt,
},
],
});
const content = completion.choices?.[0]?.message?.content || "";
const json = extractJsonObject(content);
const parsed = normalizeParsedResume(json?.parsed);
const interviewReport = normalizeInterviewReport(json?.interviewReport);
return {
parsed,
interviewReport,
parser: "ai",
parserNote: "已使用 DeepSeek 进行结构化解析与面试评估",
};
} catch (error) {
return {
parsed: parseResumeText(text),
interviewReport: normalizeInterviewReport(null),
parser: "rule",
parserNote: `AI 解析失败,已回退规则解析,面试题使用默认模板:${error.message}`,
};
}
}
let pdfjsModulePromise;
async function getPdfJsModule() {
if (!pdfjsModulePromise) {
const pdfMainPath = require.resolve("pdfjs-dist/legacy/build/pdf.mjs");
pdfjsModulePromise = import(pathToFileURL(pdfMainPath).href);
}
const pdfjs = await pdfjsModulePromise;
const workerPath = require.resolve("pdfjs-dist/legacy/build/pdf.worker.mjs");
pdfjs.GlobalWorkerOptions.workerSrc = pathToFileURL(workerPath).href;
return pdfjs;
}
function withTimeout(promise, ms, label) {
return Promise.race([
promise,
new Promise((_, reject) => {
setTimeout(() => {
reject(new Error(`${label} 超时(${ms}ms`));
}, ms);
}),
]);
}
let tesseractWorkerPromise;
let tesseractWorkerLang = "";
async function getTesseractWorker() {
const lang = process.env.PDF_OCR_LANG || "chi_sim";
if (tesseractWorkerPromise && tesseractWorkerLang !== lang) {
const old = await tesseractWorkerPromise.catch(() => null);
if (old && typeof old.terminate === "function") {
await old.terminate().catch(() => {});
}
tesseractWorkerPromise = null;
}
if (!tesseractWorkerPromise) {
tesseractWorkerLang = lang;
const initMs = Number(process.env.PDF_OCR_INIT_TIMEOUT_MS || 120000);
tesseractWorkerPromise = withTimeout(
createWorker(lang),
initMs,
"Tesseract 初始化(首次需下载模型,请保持网络畅通)"
);
}
try {
return await tesseractWorkerPromise;
} catch (error) {
tesseractWorkerPromise = null;
tesseractWorkerLang = "";
throw error;
}
}
let ocrQueue = Promise.resolve();
function enqueueOcr(task) {
const next = ocrQueue.then(() => task(), () => task());
ocrQueue = next.catch(() => {});
return next;
}
function pdfMeaningfulCharCount(text) {
return String(text || "")
.replace(/[\s\u200b-\u200d\ufeff]/g, "")
.trim().length;
}
function shouldRunPdfOcr(text) {
if (process.env.PDF_OCR_DISABLED === "1") {
return false;
}
if (process.env.PDF_OCR_ENABLED !== "1") {
return false;
}
const min = Number(process.env.PDF_OCR_MIN_CHARS || 80);
return pdfMeaningfulCharCount(text) < min;
}
async function extractPdfTextViaOcr(pdfBuffer) {
const { createCanvas } = require("@napi-rs/canvas");
const pdfjs = await getPdfJsModule();
const data = new Uint8Array(pdfBuffer);
const loadingTask = pdfjs.getDocument({
data,
useSystemFonts: true,
});
let doc;
try {
doc = await loadingTask.promise;
} catch (error) {
await loadingTask.destroy().catch(() => {});
throw error;
}
const maxPages = Math.min(
doc.numPages,
Number(process.env.PDF_OCR_MAX_PAGES || 5)
);
const scale = Number(process.env.PDF_OCR_SCALE || 2);
const worker = await getTesseractWorker();
const chunks = [];
try {
for (let pageNum = 1; pageNum <= maxPages; pageNum += 1) {
const page = await doc.getPage(pageNum);
const viewport = page.getViewport({ scale });
const width = Math.max(1, Math.ceil(viewport.width));
const height = Math.max(1, Math.ceil(viewport.height));
const canvas = createCanvas(width, height);
const ctx = canvas.getContext("2d");
ctx.fillStyle = "#ffffff";
ctx.fillRect(0, 0, width, height);
const renderTask = page.render({ canvasContext: ctx, viewport });
await renderTask.promise;
const pngBuffer = canvas.toBuffer("image/png");
const ocrTimeout = Number(process.env.PDF_OCR_PAGE_TIMEOUT_MS || 120000);
const { data } = await withTimeout(
worker.recognize(pngBuffer),
ocrTimeout,
`${pageNum} 页 OCR`
);
const pageText = String(data.text || "").trim();
if (pageText) {
chunks.push(pageText);
}
page.cleanup();
}
} finally {
await loadingTask.destroy().catch(() => {});
}
return {
text: chunks.join("\n\n"),
pagesProcessed: maxPages,
};
}
async function extractTextFromUpload(file) {
const ext = path.extname(file.originalname || "").toLowerCase();
if (ext === ".docx") {
const result = await mammoth.extractRawText({ buffer: file.buffer });
return {
text: result.value || "",
warnings: result.messages || [],
sourceType: "docx",
};
}
if (ext === ".pdf") {
const parser = new PDFParse({ data: file.buffer });
let result;
try {
result = await parser.getText();
} finally {
await parser.destroy();
}
const primaryText = result.text || "";
const minChars = Number(process.env.PDF_OCR_MIN_CHARS || 80);
const pdfExtraction = {
textLayerChars: pdfMeaningfulCharCount(primaryText),
ocrLang: process.env.PDF_OCR_LANG || "chi_sim",
ocrEnabled: process.env.PDF_OCR_ENABLED === "1",
ocrAttempted: false,
ocrPages: 0,
ocrUsed: false,
ocrError: "",
ocrSkippedReason: "",
};
let finalText = primaryText;
const warnings = [...(result.messages || [])];
if (
pdfMeaningfulCharCount(primaryText) < minChars &&
process.env.PDF_OCR_ENABLED !== "1"
) {
pdfExtraction.ocrSkippedReason =
"文本层较短,疑似扫描件;未设置 PDF_OCR_ENABLED=1已跳过 OCR可在 .env 开启并保证网络可下载 Tesseract 模型)";
}
if (shouldRunPdfOcr(primaryText)) {
pdfExtraction.ocrAttempted = true;
try {
const ocrResult = await enqueueOcr(() =>
extractPdfTextViaOcr(file.buffer)
);
pdfExtraction.ocrPages = ocrResult.pagesProcessed;
if (
pdfMeaningfulCharCount(ocrResult.text) >
pdfMeaningfulCharCount(finalText)
) {
finalText = ocrResult.text;
pdfExtraction.ocrUsed = true;
}
} catch (error) {
pdfExtraction.ocrError = error.message;
warnings.push(`PDF OCR 失败:${error.message}`);
}
}
return {
text: finalText,
warnings,
sourceType: "pdf",
pdfExtraction,
};
}
throw new Error(`不支持的文件类型:${ext || "未知"}。当前仅支持 .docx 和 .pdf`);
}
// 内存接收文件:便于直接把 buffer 交给 mammoth 解析
const upload = multer({
storage: multer.memoryStorage(),
limits: { fileSize: 20 * 1024 * 1024 }, // 20MB
fileFilter: (req, file, cb) => {
const ext = path.extname(file.originalname || "").toLowerCase();
const allowed = [".docx", ".pdf"];
if (!allowed.includes(ext)) {
return cb(
new Error(`不支持的文件类型:${ext || "未知"}。当前仅支持 .docx 和 .pdf`),
false
);
}
cb(null, true);
},
});
app.get("/health", async (req, res) => {
const mysql = await getMysqlHealth();
res.json({ ok: true, mysql });
});
app.get("/", (req, res) => {
res.sendFile(path.join(__dirname, "..", "public", "index.html"));
});
function normalizeJsonColumn(value, fallback) {
if (value == null) {
return fallback;
}
if (typeof value === "object") {
return value;
}
if (typeof value === "string") {
try {
return JSON.parse(value);
} catch {
return fallback;
}
}
return fallback;
}
/** 未配置 MySQL 时,后台 AI 完成后暂存于此,供轮询与 consult-ai 读取 */
const memoryAiByHash = new Map();
/**
* 后台:会话调模型并写入数据库(或内存),完成后删除会话。
*/
async function runResumeAiPersistInBackground(uploadId) {
const session = getSession(uploadId);
if (!session) {
return;
}
let persisted = false;
try {
const aiResult = await parseResumeWithAI(session.extractedText);
const p = aiResult.parsed || {};
const payload = {
originalFilename: session.originalFilename,
mimeType: session.mimeType,
sourceType: session.sourceType,
fileSha256: session.fileSha256,
name: p.姓名 || null,
city: p.城市 || null,
age: p.年龄 || null,
expectedSalary: p.期望薪资 || null,
educationExperience: p.教育经历 || null,
education: p.学历 || null,
ability: p.能力 || null,
workExperience: p.工作经历 || null,
projectExperience: p.项目经历 || null,
techStack: p.技术栈 || null,
interviewReport: aiResult.interviewReport,
pdfMeta: session.pdfExtraction || null,
parser: aiResult.parser,
parserNote: aiResult.parserNote,
warningsJson: session.warnings,
};
if (getMysqlPool()) {
try {
await insertResumeSubmission(payload);
persisted = true;
} catch (err) {
console.error("[resume-ai-bg] 写入数据库失败,已写入内存供轮询:", err?.message || err);
memoryAiByHash.set(session.fileSha256, {
parsed: aiResult.parsed,
interviewReport: aiResult.interviewReport,
parser: aiResult.parser,
parserNote: aiResult.parserNote,
warnings: session.warnings,
});
persisted = true;
}
} else {
memoryAiByHash.set(session.fileSha256, {
parsed: aiResult.parsed,
interviewReport: aiResult.interviewReport,
parser: aiResult.parser,
parserNote: aiResult.parserNote,
warnings: session.warnings,
});
persisted = true;
}
} catch (err) {
console.error("[resume-ai-bg] 生成失败:", err?.message || err);
} finally {
if (persisted) {
deleteSession(uploadId);
}
}
}
// 第一步:上传 + 文本抽取 + 查重(不调 AI
app.post("/api/resume/step1", upload.single("file"), async (req, res, next) => {
try {
if (!req.file) {
return res.status(400).json({ error: "缺少上传文件:请使用 form-data 字段 file" });
}
const extracted = await extractTextFromUpload(req.file);
const fileSha256 = crypto
.createHash("sha256")
.update(req.file.buffer)
.digest("hex");
const parsedPreview = parseResumeText(extracted.text);
if (getMysqlPool()) {
const duplicateRow = await findDuplicateResumeByFileHash(fileSha256);
if (duplicateRow) {
const parsed = dbRowToParsed(duplicateRow);
const interviewReport = normalizeJsonColumn(
duplicateRow.interview_report,
null
);
const warnings = normalizeJsonColumn(duplicateRow.warnings_json, []);
return res.json({
step: 1,
fileSha256,
uploadId: null,
isDuplicate: true,
duplicateOfId: duplicateRow.id,
aiReady: true,
fromCache: true,
text: extracted.text,
pdfExtraction: extracted.pdfExtraction,
parsedPreview: parsed,
interviewReport,
parser: duplicateRow.parser,
parserNote: duplicateRow.parser_note,
warnings: Array.isArray(warnings) ? warnings : [],
hint: "该文件与库中记录重复,已直接返回库内 AI 结果,无需再次调用模型。",
});
}
}
const uploadId = crypto.randomUUID();
saveSession(uploadId, {
extractedText: extracted.text,
fileSha256,
originalFilename: req.file.originalname,
mimeType: req.file.mimetype,
sourceType: extracted.sourceType,
pdfExtraction: extracted.pdfExtraction,
warnings: extracted.warnings,
});
setImmediate(() => {
runResumeAiPersistInBackground(uploadId).catch((err) => {
console.error("[resume-ai-bg] 未捕获:", err?.message || err);
});
});
return res.json({
step: 1,
fileSha256,
uploadId,
isDuplicate: false,
duplicateOfId: null,
aiReady: false,
fromCache: false,
text: extracted.text,
pdfExtraction: extracted.pdfExtraction,
parsedPreview,
interviewReport: null,
parser: null,
parserNote: null,
warnings: extracted.warnings,
hint: "AI 正在后台生成并写入数据库,就绪后「咨询 AI」将可点击也可通过页面轮询获知。",
});
} catch (err) {
next(err);
}
});
/** 轮询:数据库(或内存)中是否已有该文件的 AI 评估结果 */
app.get("/api/resume/ai-ready", async (req, res, next) => {
try {
const fileSha256 = req.query.fileSha256;
if (!fileSha256 || typeof fileSha256 !== "string") {
return res.status(400).json({ error: "缺少 fileSha256" });
}
if (getMysqlPool()) {
const row = await findDuplicateResumeByFileHash(fileSha256);
if (row) {
return res.json({
ready: true,
duplicateOfId: row.id,
parserNote: row.parser_note || null,
});
}
}
if (memoryAiByHash.has(fileSha256)) {
return res.json({ ready: true, fromMemory: true });
}
return res.json({ ready: false });
} catch (err) {
next(err);
}
});
// 第二步:仅从数据库(或内存缓存)读取已落库的 AI 结果;不再在此处同步调模型
app.post("/api/resume/consult-ai", async (req, res, next) => {
try {
const { fileSha256 } = req.body || {};
if (!fileSha256 || typeof fileSha256 !== "string") {
return res.status(400).json({ error: "缺少 fileSha256" });
}
if (getMysqlPool()) {
const dup = await findDuplicateResumeByFileHash(fileSha256);
if (dup) {
const parsed = dbRowToParsed(dup);
const interviewReport = normalizeJsonColumn(dup.interview_report, null);
const warnings = normalizeJsonColumn(dup.warnings_json, []);
return res.json({
fromCache: true,
isDuplicate: true,
duplicateOfId: dup.id,
parsed,
interviewReport,
parser: dup.parser,
parserNote: `${dup.parser_note || ""}(库内读取)`.trim(),
warnings: Array.isArray(warnings) ? warnings : [],
db: { saved: false, skipped: true, reason: "read_db" },
});
}
return res.status(409).json({
error:
"库中尚未找到该文件的 AI 评估,请等待后台写入完成后再试(页面会自动轮询)。",
});
}
const mem = memoryAiByHash.get(fileSha256);
if (mem) {
return res.json({
fromCache: true,
isDuplicate: false,
duplicateOfId: null,
parsed: mem.parsed,
interviewReport: mem.interviewReport,
parser: mem.parser,
parserNote: `${mem.parserNote || ""}(内存缓存)`.trim(),
warnings: Array.isArray(mem.warnings) ? mem.warnings : [],
db: { saved: false, skipped: true, reason: "memory" },
});
}
return res.status(409).json({
error:
"尚未找到 AI 评估结果,请等待后台生成完成后再试(未配置数据库时使用内存缓存)。",
});
} catch (err) {
next(err);
}
});
// 接收 multipart/form-data字段名必须是 file
// 返回解析后的纯文本raw text
app.post("/api/parse-word", upload.single("file"), async (req, res, next) => {
try {
if (!req.file) {
return res.status(400).json({ error: "缺少上传文件:请使用 form-data 字段 file" });
}
const extracted = await extractTextFromUpload(req.file);
const aiResult = await parseResumeWithAI(extracted.text);
const fileSha256 = crypto
.createHash("sha256")
.update(req.file.buffer)
.digest("hex");
const payload = {
filename: req.file.originalname,
mimeType: req.file.mimetype,
sourceType: extracted.sourceType,
text: extracted.text,
pdfExtraction: extracted.pdfExtraction,
parsed: aiResult.parsed,
interviewReport: aiResult.interviewReport,
parser: aiResult.parser,
parserNote: aiResult.parserNote,
warnings: extracted.warnings,
};
let db = { saved: false, skipped: true };
if (getMysqlPool()) {
db = { saved: false, skipped: false };
try {
const p = aiResult.parsed || {};
const insertId = await insertResumeSubmission({
originalFilename: req.file.originalname,
mimeType: req.file.mimetype,
sourceType: extracted.sourceType,
fileSha256,
name: p.姓名 || null,
city: p.城市 || null,
age: p.年龄 || null,
expectedSalary: p.期望薪资 || null,
educationExperience: p.教育经历 || null,
education: p.学历 || null,
ability: p.能力 || null,
workExperience: p.工作经历 || null,
projectExperience: p.项目经历 || null,
techStack: p.技术栈 || null,
interviewReport: aiResult.interviewReport,
pdfMeta: extracted.pdfExtraction || null,
parser: aiResult.parser,
parserNote: aiResult.parserNote,
warningsJson: extracted.warnings,
});
db = { saved: true, id: insertId };
} catch (err) {
console.error("[mysql] 写入 resume_submissions 失败:", err?.message || err);
db = {
saved: false,
skipped: false,
error: err?.message || String(err),
};
}
}
return res.json({ ...payload, db });
} catch (err) {
next(err);
}
});
app.use(express.static(path.join(__dirname, "..", "public")));
// 统一错误处理
app.use((err, req, res, next) => {
const status =
err && err.message && /不支持的文件类型/.test(err.message) ? 415 : 500;
return res.status(status).json({
error: err?.message || "服务端错误",
});
});
const PORT = Number(process.env.PORT || 3000);
app.listen(PORT, async () => {
console.log(`Word parse API running on http://localhost:${PORT}`);
try {
const mysqlResult = await testMysqlConnection();
if (mysqlResult.skipped) {
console.log("[mysql] 未配置 MYSQL_HOST跳过连接");
} else if (mysqlResult.ok) {
console.log(`[mysql] 已连接,当前库: ${mysqlResult.database || "未知"}`);
try {
await ensureResumeTables();
console.log("[mysql] 数据表 resume_submissions 已就绪");
} catch (tableErr) {
console.error("[mysql] 建表失败:", tableErr?.message || tableErr);
}
}
} catch (err) {
console.error("[mysql] 连接失败:", err?.message || err);
}
});

View File

@@ -0,0 +1,42 @@
const TTL_MS = Number(process.env.UPLOAD_SESSION_TTL_MS || 60 * 60 * 1000);
const store = new Map();
function saveSession(uploadId, payload) {
const record = {
...payload,
expiresAt: Date.now() + TTL_MS,
};
store.set(uploadId, record);
setTimeout(() => {
const cur = store.get(uploadId);
if (cur === record) {
store.delete(uploadId);
}
}, TTL_MS);
}
function getSession(uploadId) {
if (!uploadId) {
return null;
}
const v = store.get(uploadId);
if (!v) {
return null;
}
if (Date.now() > v.expiresAt) {
store.delete(uploadId);
return null;
}
return v;
}
function deleteSession(uploadId) {
store.delete(uploadId);
}
module.exports = {
saveSession,
getSession,
deleteSession,
};