11// based on https://github.com/mozilla/pdf.js/tree/master/examples/node/pdf2png
22/*global Uint8Array*/
33
4- import { strict as assert } from "assert" ;
5- import Canvas from "canvas" ;
64import fs from "fs" ;
75import { getDocument } from "pdfjs-dist/legacy/build/pdf.mjs" ;
86
97class NodeCanvasFactory {
10- create ( width , height ) {
11- assert ( width > 0 && height > 0 , "Invalid canvas size" ) ;
12- const canvas = Canvas . createCanvas ( width , height ) ;
13- const context = canvas . getContext ( "2d" ) ;
14- return {
15- canvas,
16- context,
17- } ;
18- }
8+ create ( width , height ) {
9+ assert ( width > 0 && height > 0 , "Invalid canvas size" ) ;
10+ const canvas = Canvas . createCanvas ( width , height ) ;
11+ const context = canvas . getContext ( "2d" ) ;
12+ return {
13+ canvas,
14+ context,
15+ } ;
16+ }
1917
20- reset ( canvasAndContext , width , height ) {
21- assert ( canvasAndContext . canvas , "Canvas is not specified" ) ;
22- assert ( width > 0 && height > 0 , "Invalid canvas size" ) ;
23- canvasAndContext . canvas . width = width ;
24- canvasAndContext . canvas . height = height ;
25- }
18+ reset ( canvasAndContext , width , height ) {
19+ assert ( canvasAndContext . canvas , "Canvas is not specified" ) ;
20+ assert ( width > 0 && height > 0 , "Invalid canvas size" ) ;
21+ canvasAndContext . canvas . width = width ;
22+ canvasAndContext . canvas . height = height ;
23+ }
2624
27- destroy ( canvasAndContext ) {
28- assert ( canvasAndContext . canvas , "Canvas is not specified" ) ;
25+ destroy ( canvasAndContext ) {
26+ assert ( canvasAndContext . canvas , "Canvas is not specified" ) ;
2927
30- // Zeroing the width and height cause Firefox to release graphics
31- // resources immediately, which can greatly reduce memory consumption.
32- canvasAndContext . canvas . width = 0 ;
33- canvasAndContext . canvas . height = 0 ;
34- canvasAndContext . canvas = null ;
35- canvasAndContext . context = null ;
36- }
28+ // Zeroing the width and height cause Firefox to release graphics
29+ // resources immediately, which can greatly reduce memory consumption.
30+ canvasAndContext . canvas . width = 0 ;
31+ canvasAndContext . canvas . height = 0 ;
32+ canvasAndContext . canvas = null ;
33+ canvasAndContext . context = null ;
34+ }
3735}
3836
3937export async function convertToPng ( sourceFile , targetPrefix ) {
40- // Some PDFs need external cmaps.
41- const CMAP_URL = "../../../node_modules/pdfjs-dist/cmaps/" ;
42- const CMAP_PACKED = true ;
38+ // Some PDFs need external cmaps.
39+ const CMAP_URL = "../../../node_modules/pdfjs-dist/cmaps/" ;
40+ const CMAP_PACKED = true ;
4341
44- const canvasFactory = new NodeCanvasFactory ( ) ;
42+ const canvasFactory = new NodeCanvasFactory ( ) ;
4543
46- // Loading file from file system into typed array.
47- const pdfData = new Uint8Array ( fs . readFileSync ( sourceFile ) ) ;
44+ // Loading file from file system into typed array.
45+ const pdfData = new Uint8Array ( fs . readFileSync ( sourceFile ) ) ;
4846
49- // Load the PDF file.
50- const loadingTask = getDocument ( {
51- data : pdfData ,
52- cMapUrl : CMAP_URL ,
53- cMapPacked : CMAP_PACKED ,
54- } ) ;
55- const pdfDocument = await loadingTask . promise ;
47+ // Load the PDF file.
48+ const loadingTask = getDocument ( {
49+ data : pdfData ,
50+ cMapUrl : CMAP_URL ,
51+ cMapPacked : CMAP_PACKED ,
52+ } ) ;
53+ const pdfDocument = await loadingTask . promise ;
5654
57- for ( let pageNumber = 1 ; pageNumber <= pdfDocument . numPages ; pageNumber ++ ) {
58- await processPage ( pageNumber ) ;
59- }
60- return pdfDocument . numPages ;
55+ for ( let pageNumber = 1 ; pageNumber <= pdfDocument . numPages ; pageNumber ++ ) {
56+ await processPage ( pageNumber ) ;
57+ }
58+ return pdfDocument . numPages ;
6159
62- async function processPage ( pageNumber ) {
63- console . log ( "# Processing page:" , pageNumber ) ;
64- // Get the page.
65- const page = await pdfDocument . getPage ( pageNumber ) ;
66- // Render the page on a Node canvas with 100% scale.
67- const viewport = page . getViewport ( { scale : 1.0 } ) ;
68- const canvasAndContext = canvasFactory . create (
69- viewport . width ,
70- viewport . height
71- ) ;
72- const renderContext = {
73- canvasContext : canvasAndContext . context ,
74- viewport,
75- } ;
60+ async function processPage ( pageNumber ) {
61+ console . log ( "# Processing page:" , pageNumber ) ;
62+ // Get the page.
63+ const page = await pdfDocument . getPage ( pageNumber ) ;
64+ // Render the page on a Node canvas with 100% scale.
65+ const canvasFactory = pdfDocument . canvasFactory ;
66+ const viewport = page . getViewport ( { scale : 1.0 } ) ;
67+ const canvasAndContext = canvasFactory . create (
68+ viewport . width ,
69+ viewport . height
70+ ) ;
71+ const renderContext = {
72+ canvasContext : canvasAndContext . context ,
73+ viewport,
74+ } ;
7675
77- const renderTask = page . render ( renderContext ) ;
78- await renderTask . promise ;
79- // Convert the canvas to an image buffer.
80- const image = canvasAndContext . canvas . toBuffer ( ) ;
81- const targetFile = `${ targetPrefix } ${ pageNumber } .png` ;
82-
83- fs . writeFileSync ( targetFile , image ) ;
84- console . log ( "Finished converting page" , pageNumber , "to" , targetFile ) ;
85- // Release page resources.
86- page . cleanup ( ) ;
87- }
88- }
76+ const renderTask = page . render ( renderContext ) ;
77+ await renderTask . promise ;
78+ // Convert the canvas to an image buffer.
79+ const image = canvasAndContext . canvas . toBuffer ( "image/png" ) ;
80+ const targetFile = `${ targetPrefix } ${ pageNumber } .png` ;
8981
82+ fs . writeFileSync ( targetFile , image ) ;
83+ console . log ( "Finished converting page" , pageNumber , "to" , targetFile ) ;
84+ // Release page resources.
85+ page . cleanup ( ) ;
86+ }
87+ }
0 commit comments