const tmp = await fs.mkdtemp('/tmp/pdf-');
await fs.writeFile(`${tmp}/a.pdf`, fileA);
await fs.writeFile(`${tmp}/b.pdf`, fileB);
const { stdout, stderr } = await exec( `pdftk ${tmp}/a.pdf ${tmp}/b.pdf cat output ${tmp}/out.pdf`
);
const result = await fs.readFile(`${tmp}/out.pdf`);
await fs.rm(tmp, { recursive: true });
return result;
const tmp = await fs.mkdtemp('/tmp/pdf-');
await fs.writeFile(`${tmp}/a.pdf`, fileA);
await fs.writeFile(`${tmp}/b.pdf`, fileB);
const { stdout, stderr } = await exec( `pdftk ${tmp}/a.pdf ${tmp}/b.pdf cat output ${tmp}/out.pdf`
);
const result = await fs.readFile(`${tmp}/out.pdf`);
await fs.rm(tmp, { recursive: true });
return result;
const tmp = await fs.mkdtemp('/tmp/pdf-');
await fs.writeFile(`${tmp}/a.pdf`, fileA);
await fs.writeFile(`${tmp}/b.pdf`, fileB);
const { stdout, stderr } = await exec( `pdftk ${tmp}/a.pdf ${tmp}/b.pdf cat output ${tmp}/out.pdf`
);
const result = await fs.readFile(`${tmp}/out.pdf`);
await fs.rm(tmp, { recursive: true });
return result;
import { PDFDocument } from 'pdf-lib'; export async function mergePdfs(inputs: Uint8Array[]): Promise<Uint8Array> { const out = await PDFDocument.create(); for (const input of inputs) { const src = await PDFDocument.load(input); const pages = await out.copyPages(src, src.getPageIndices()); for (const page of pages) out.addPage(page); } return out.save();
}
import { PDFDocument } from 'pdf-lib'; export async function mergePdfs(inputs: Uint8Array[]): Promise<Uint8Array> { const out = await PDFDocument.create(); for (const input of inputs) { const src = await PDFDocument.load(input); const pages = await out.copyPages(src, src.getPageIndices()); for (const page of pages) out.addPage(page); } return out.save();
}
import { PDFDocument } from 'pdf-lib'; export async function mergePdfs(inputs: Uint8Array[]): Promise<Uint8Array> { const out = await PDFDocument.create(); for (const input of inputs) { const src = await PDFDocument.load(input); const pages = await out.copyPages(src, src.getPageIndices()); for (const page of pages) out.addPage(page); } return out.save();
}
export function parseRanges(spec: string, opts: { maxPage: number }): number[] { if (typeof spec !== 'string' || spec.trim() === '') throw new RangeParseError('range spec is empty'); const parts = spec.split(',').map(p => p.trim()).filter(p => p.length > 0); if (parts.length === 0) throw new RangeParseError('range spec has no parts'); const result: number[] = []; for (const part of parts) { if (part.includes('-')) { const halves = part.split('-'); if (halves.length !== 2) throw new RangeParseError(`invalid range segment: ${part}`); const a = Number(halves[0]); const b = Number(halves[1]); if (!Number.isInteger(a) || !Number.isInteger(b)) throw new RangeParseError(`range endpoints must be integers`); if (a < 1 || b < 1 || a > opts.maxPage || b > opts.maxPage) throw new RangeParseError(`range ${part} out of bounds`); if (a <= b) { for (let i = a; i <= b; i++) result.push(i); } else { // reverse range: "5-3" → [5,4,3] for (let i = a; i >= b; i--) result.push(i); } } else { const n = Number(part); if (!Number.isInteger(n) || n < 1 || n > opts.maxPage) throw new RangeParseError(`invalid page: ${part}`); result.push(n); } } return result;
}
export function parseRanges(spec: string, opts: { maxPage: number }): number[] { if (typeof spec !== 'string' || spec.trim() === '') throw new RangeParseError('range spec is empty'); const parts = spec.split(',').map(p => p.trim()).filter(p => p.length > 0); if (parts.length === 0) throw new RangeParseError('range spec has no parts'); const result: number[] = []; for (const part of parts) { if (part.includes('-')) { const halves = part.split('-'); if (halves.length !== 2) throw new RangeParseError(`invalid range segment: ${part}`); const a = Number(halves[0]); const b = Number(halves[1]); if (!Number.isInteger(a) || !Number.isInteger(b)) throw new RangeParseError(`range endpoints must be integers`); if (a < 1 || b < 1 || a > opts.maxPage || b > opts.maxPage) throw new RangeParseError(`range ${part} out of bounds`); if (a <= b) { for (let i = a; i <= b; i++) result.push(i); } else { // reverse range: "5-3" → [5,4,3] for (let i = a; i >= b; i--) result.push(i); } } else { const n = Number(part); if (!Number.isInteger(n) || n < 1 || n > opts.maxPage) throw new RangeParseError(`invalid page: ${part}`); result.push(n); } } return result;
}
export function parseRanges(spec: string, opts: { maxPage: number }): number[] { if (typeof spec !== 'string' || spec.trim() === '') throw new RangeParseError('range spec is empty'); const parts = spec.split(',').map(p => p.trim()).filter(p => p.length > 0); if (parts.length === 0) throw new RangeParseError('range spec has no parts'); const result: number[] = []; for (const part of parts) { if (part.includes('-')) { const halves = part.split('-'); if (halves.length !== 2) throw new RangeParseError(`invalid range segment: ${part}`); const a = Number(halves[0]); const b = Number(halves[1]); if (!Number.isInteger(a) || !Number.isInteger(b)) throw new RangeParseError(`range endpoints must be integers`); if (a < 1 || b < 1 || a > opts.maxPage || b > opts.maxPage) throw new RangeParseError(`range ${part} out of bounds`); if (a <= b) { for (let i = a; i <= b; i++) result.push(i); } else { // reverse range: "5-3" → [5,4,3] for (let i = a; i >= b; i--) result.push(i); } } else { const n = Number(part); if (!Number.isInteger(n) || n < 1 || n > opts.maxPage) throw new RangeParseError(`invalid page: ${part}`); result.push(n); } } return result;
}
export function errorResponse(c: Context, err: unknown): Response { if (err instanceof MissingFileError) return c.json({ error: 'missing_file' }, 422); if (err instanceof PayloadTooLargeError) return c.json({ error: 'payload_too_large', limit_mb: err.limitMb }, 413); if (err instanceof UnsupportedMediaTypeError) return c.json({ error: 'unsupported_media_type' }, 415); if (err instanceof RangeParseError) return c.json({ error: 'invalid_range', message: err.message }, 422); if (err instanceof Error) return c.json({ error: 'pdf_processing_failed', message: err.message }, 422); return c.json({ error: 'internal_error' }, 500);
}
export function errorResponse(c: Context, err: unknown): Response { if (err instanceof MissingFileError) return c.json({ error: 'missing_file' }, 422); if (err instanceof PayloadTooLargeError) return c.json({ error: 'payload_too_large', limit_mb: err.limitMb }, 413); if (err instanceof UnsupportedMediaTypeError) return c.json({ error: 'unsupported_media_type' }, 415); if (err instanceof RangeParseError) return c.json({ error: 'invalid_range', message: err.message }, 422); if (err instanceof Error) return c.json({ error: 'pdf_processing_failed', message: err.message }, 422); return c.json({ error: 'internal_error' }, 500);
}
export function errorResponse(c: Context, err: unknown): Response { if (err instanceof MissingFileError) return c.json({ error: 'missing_file' }, 422); if (err instanceof PayloadTooLargeError) return c.json({ error: 'payload_too_large', limit_mb: err.limitMb }, 413); if (err instanceof UnsupportedMediaTypeError) return c.json({ error: 'unsupported_media_type' }, 415); if (err instanceof RangeParseError) return c.json({ error: 'invalid_range', message: err.message }, 422); if (err instanceof Error) return c.json({ error: 'pdf_processing_failed', message: err.message }, 422); return c.json({ error: 'internal_error' }, 500);
}
const PDF_MAGIC = new Uint8Array([0x25, 0x50, 0x44, 0x46, 0x2d]); export function isPdfBytes(bytes: Uint8Array): boolean { if (bytes.length < PDF_MAGIC.length) return false; for (let i = 0; i < PDF_MAGIC.length; i++) { if (bytes[i] !== PDF_MAGIC[i]) return false; } return true;
} export function assertPdfBytes(bytes: Uint8Array): void { if (!isPdfBytes(bytes)) { throw new UnsupportedMediaTypeError( 'uploaded file does not start with %PDF- magic bytes', ); }
}
const PDF_MAGIC = new Uint8Array([0x25, 0x50, 0x44, 0x46, 0x2d]); export function isPdfBytes(bytes: Uint8Array): boolean { if (bytes.length < PDF_MAGIC.length) return false; for (let i = 0; i < PDF_MAGIC.length; i++) { if (bytes[i] !== PDF_MAGIC[i]) return false; } return true;
} export function assertPdfBytes(bytes: Uint8Array): void { if (!isPdfBytes(bytes)) { throw new UnsupportedMediaTypeError( 'uploaded file does not start with %PDF- magic bytes', ); }
}
const PDF_MAGIC = new Uint8Array([0x25, 0x50, 0x44, 0x46, 0x2d]); export function isPdfBytes(bytes: Uint8Array): boolean { if (bytes.length < PDF_MAGIC.length) return false; for (let i = 0; i < PDF_MAGIC.length; i++) { if (bytes[i] !== PDF_MAGIC[i]) return false; } return true;
} export function assertPdfBytes(bytes: Uint8Array): void { if (!isPdfBytes(bytes)) { throw new UnsupportedMediaTypeError( 'uploaded file does not start with %PDF- magic bytes', ); }
}
async function makePdf(pageCount: number, opts: { title?: string } = {}) { const doc = await PDFDocument.create(); for (let i = 0; i < pageCount; i++) { doc.addPage([200, 200]).drawText(`p${i + 1}`); } if (opts.title) doc.setTitle(opts.title); return doc.save();
} it('preserves metadata of first document in merge', async () => { const a = await makePdf(1, { title: 'First Report' }); const b = await makePdf(1, { title: 'Second' }); const merged = await mergePdfs([a, b]); const doc = await PDFDocument.load(merged); expect(doc.getTitle()).toBe('First Report');
});
async function makePdf(pageCount: number, opts: { title?: string } = {}) { const doc = await PDFDocument.create(); for (let i = 0; i < pageCount; i++) { doc.addPage([200, 200]).drawText(`p${i + 1}`); } if (opts.title) doc.setTitle(opts.title); return doc.save();
} it('preserves metadata of first document in merge', async () => { const a = await makePdf(1, { title: 'First Report' }); const b = await makePdf(1, { title: 'Second' }); const merged = await mergePdfs([a, b]); const doc = await PDFDocument.load(merged); expect(doc.getTitle()).toBe('First Report');
});
async function makePdf(pageCount: number, opts: { title?: string } = {}) { const doc = await PDFDocument.create(); for (let i = 0; i < pageCount; i++) { doc.addPage([200, 200]).drawText(`p${i + 1}`); } if (opts.title) doc.setTitle(opts.title); return doc.save();
} it('preserves metadata of first document in merge', async () => { const a = await makePdf(1, { title: 'First Report' }); const b = await makePdf(1, { title: 'Second' }); const merged = await mergePdfs([a, b]); const doc = await PDFDocument.load(merged); expect(doc.getTitle()).toBe('First Report');
});
git clone https://github.com/sen-ltd/pdf-merge-api
cd pdf-merge-api
docker build -t pdf-merge-api .
docker run --rm -p 8000:8000 pdf-merge-api # In another terminal:
curl -F "[email protected]" -F "[email protected]" \ -o merged.pdf http://localhost:8000/merge curl -F "[email protected]" \ "http://localhost:8000/split?ranges=1-2,5" -o excerpt.pdf curl -F "[email protected]" http://localhost:8000/info | jq curl -F "[email protected]" \ "http://localhost:8000/rotate?rotation=90&pages=1,3" -o rotated.pdf
git clone https://github.com/sen-ltd/pdf-merge-api
cd pdf-merge-api
docker build -t pdf-merge-api .
docker run --rm -p 8000:8000 pdf-merge-api # In another terminal:
curl -F "[email protected]" -F "[email protected]" \ -o merged.pdf http://localhost:8000/merge curl -F "[email protected]" \ "http://localhost:8000/split?ranges=1-2,5" -o excerpt.pdf curl -F "[email protected]" http://localhost:8000/info | jq curl -F "[email protected]" \ "http://localhost:8000/rotate?rotation=90&pages=1,3" -o rotated.pdf
git clone https://github.com/sen-ltd/pdf-merge-api
cd pdf-merge-api
docker build -t pdf-merge-api .
docker run --rm -p 8000:8000 pdf-merge-api # In another terminal:
curl -F "[email protected]" -F "[email protected]" \ -o merged.pdf http://localhost:8000/merge curl -F "[email protected]" \ "http://localhost:8000/split?ranges=1-2,5" -o excerpt.pdf curl -F "[email protected]" http://localhost:8000/info | jq curl -F "[email protected]" \ "http://localhost:8000/rotate?rotation=90&pages=1,3" -o rotated.pdf - HTML files that were misnamed .pdf by a browser auto-download
- ZIP archives (PK\x03\x04) that a user zipped and then renamed because the upload form rejected .zip
- JPEGs (\xff\xd8\xff) that an iOS device produced instead of a PDF
- Totally random garbage that a fuzzer is posting at you to crash pdf-lib - MAX_UPLOAD_MB via env var, default 20. Oversize uploads get 413 with a structured JSON body. Without this, one pathological request can OOM the process. 20 MB is a reasonable default for a document service that is not trying to be a document store.
- In-memory only. The service never touches disk. Uploads come in as Uint8Array, get processed in memory, get written to the response stream. No tempdirs, no cleanup race conditions, no quota management. If a container falls over, there is no state to recover.
- createApp() factory. Tests drive the entire app via app.request() — Hono's fetch-spec-compatible handler — with real FormData objects. No sockets, no ports, no test servers to tear down. The multipart parser, the magic-byte check, and every happy and sad path of each route is exercised in a unit-test-speed suite.
- Structured JSON logs. One line per request on stdout: method, path, status, duration. Any log shipper — fluent-bit, vector, CloudWatch agent — picks it up unchanged. I used to reach for pino for this; the replacement is seven lines of code and it's clearer.
- Non-root, multi-stage Alpine build. Final image is 190 MB, mostly the Node runtime. No native packages. Starts in under 200 ms. - No text extraction. pdf-lib does not expose text content. If you need it, use pdf.js or a dedicated pdftxt-api. I deliberately keep this service single-purpose.
- No rasterization. You cannot get a PNG preview of page 1 out of this service. Same reason — different library for that.
- No encrypted PDFs. pdf-lib has limited support for password-protected documents. I don't try.
- No form filling. pdf-lib can do this, but I didn't wire an endpoint for it because I didn't have a use case. Would be an afternoon to add.
- No streaming. Everything is buffered in memory up to MAX_UPLOAD_MB. This is fine for documents; it would not be fine for a service handling 500 MB scans.