diff --git a/src/index.js b/src/index.js index b713ce2..06cbc06 100644 --- a/src/index.js +++ b/src/index.js @@ -35,6 +35,171 @@ function getHeader(headers, name) { } } +function splitEmail(raw) { + const match = raw.match(/\r?\n\r?\n/); + if (!match) return { headerText: "", bodyText: raw }; + const idx = match.index; + const sepLen = match[0].length; + return { + headerText: raw.slice(0, idx), + bodyText: raw.slice(idx + sepLen) + }; +} + +function parseHeaderText(headerText) { + const lines = headerText.split(/\r?\n/); + const headers = {}; + let current = ""; + + for (const line of lines) { + if (/^[ \t]/.test(line) && current) { + headers[current] += ` ${line.trim()}`; + continue; + } + + const idx = line.indexOf(":"); + if (idx <= 0) continue; + const key = line.slice(0, idx).trim().toLowerCase(); + const value = line.slice(idx + 1).trim(); + current = key; + headers[key] = headers[key] ? `${headers[key]}, ${value}` : value; + } + + return headers; +} + +function getHeaderParam(headerValue, paramName) { + if (!headerValue) return ""; + const escaped = paramName.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + const quoted = new RegExp(`${escaped}="([^"]+)"`, "i"); + const unquoted = new RegExp(`${escaped}=([^;\\s]+)`, "i"); + const m1 = headerValue.match(quoted); + if (m1) return m1[1]; + const m2 = headerValue.match(unquoted); + return m2 ? m2[1] : ""; +} + +function bytesToText(bytes, charset = "utf-8") { + try { + return new TextDecoder(charset).decode(bytes); + } catch { + return new TextDecoder("utf-8").decode(bytes); + } +} + +function base64ToBytes(input) { + const clean = input.replace(/\s+/g, ""); + if (!clean) return new Uint8Array(); + const bin = atob(clean); + const out = new Uint8Array(bin.length); + for (let i = 0; i < bin.length; i++) out[i] = bin.charCodeAt(i); + return out; +} + +function quotedPrintableToBytes(input) { + const normalized = input + .replace(/=\r?\n/g, "") + .replace(/=([0-9A-Fa-f]{2})/g, (_m, hex) => String.fromCharCode(parseInt(hex, 16))); + + const out = new Uint8Array(normalized.length); + for (let i = 0; i < normalized.length; i++) out[i] = normalized.charCodeAt(i) & 0xff; + return out; +} + +function decodePartBody(bodyText, transferEncoding, charset) { + const enc = (transferEncoding || "").toLowerCase(); + + if (enc.includes("base64")) { + return bytesToText(base64ToBytes(bodyText), charset); + } + + if (enc.includes("quoted-printable")) { + return bytesToText(quotedPrintableToBytes(bodyText), charset); + } + + const bytes = new Uint8Array(bodyText.length); + for (let i = 0; i < bodyText.length; i++) bytes[i] = bodyText.charCodeAt(i) & 0xff; + return bytesToText(bytes, charset); +} + +function htmlToText(html) { + return html + .replace(//gi, "") + .replace(//gi, "") + .replace(//gi, "\n") + .replace(/<\/p>/gi, "\n") + .replace(/<[^>]+>/g, "") + .replace(/ /gi, " ") + .replace(/&/gi, "&") + .replace(/</gi, "<") + .replace(/>/gi, ">") + .replace(/\r/g, "") + .replace(/\n{3,}/g, "\n\n") + .trim(); +} + +function splitMultipartParts(bodyText, boundary) { + const delimiter = `--${boundary}`; + const closing = `--${boundary}--`; + const lines = bodyText.split(/\r?\n/); + const parts = []; + let collecting = false; + let current = []; + + for (const line of lines) { + if (line === delimiter || line === closing) { + if (collecting && current.length) { + parts.push(current.join("\n").trim()); + current = []; + } + collecting = line !== closing; + continue; + } + if (collecting) current.push(line); + } + + if (current.length) parts.push(current.join("\n").trim()); + return parts; +} + +function extractReadableBody(raw) { + const walk = (rawText) => { + const { headerText, bodyText } = splitEmail(rawText); + const headers = parseHeaderText(headerText); + const contentType = (headers["content-type"] || "text/plain").toLowerCase(); + const transferEncoding = headers["content-transfer-encoding"] || ""; + const charset = getHeaderParam(headers["content-type"] || "", "charset") || "utf-8"; + + if (contentType.includes("multipart/")) { + const boundary = getHeaderParam(headers["content-type"] || "", "boundary"); + if (!boundary) return bodyText.trim(); + + const parts = splitMultipartParts(bodyText, boundary); + const plainTexts = []; + const htmlTexts = []; + + for (const part of parts) { + const parsed = walk(part); + if (!parsed.text) continue; + if (parsed.kind === "text/plain") plainTexts.push(parsed.text); + else if (parsed.kind === "text/html") htmlTexts.push(parsed.text); + } + + if (plainTexts.length) return { kind: "text/plain", text: plainTexts.join("\n\n").trim() }; + if (htmlTexts.length) return { kind: "text/plain", text: htmlToText(htmlTexts.join("\n\n")).trim() }; + return { kind: "text/plain", text: bodyText.trim() }; + } + + const decoded = decodePartBody(bodyText, transferEncoding, charset).trim(); + if (contentType.includes("text/html")) return { kind: "text/html", text: decoded }; + return { kind: "text/plain", text: decoded }; + }; + + const result = walk(raw); + if (!result || !result.text) return raw; + return result.kind === "text/html" ? htmlToText(result.text) : result.text; +} + const ONE_DAY_MS = 24 * 60 * 60 * 1000; export default { @@ -88,7 +253,8 @@ export default { const subject = getHeader(message.headers, "subject"); const id = getHeader(message.headers, "message-id") || crypto.randomUUID(); const nexthop = recipient.includes("@") ? recipient.split("@")[1] : ""; - const content = await streamToString(message.raw); + const raw = await streamToString(message.raw); + const content = extractReadableBody(raw) || raw; const received_at = Date.now(); await env.DB.prepare(