1+ require " #{ process .env .DOTFILES } /lib/node-globals"
12puppeteer = require ' puppeteer'
2- pr = require ' bluebird'
3- fs = require ' fs'
3+ crypto = require ' crypto'
4+ mkdirp = require ' mkdirp'
5+ slugify = require ' slugify'
46
57usage = """
68usage: pup [options] URL [more urls...]
@@ -16,6 +18,8 @@ OPTIONS
1618 to set regex flags like "i" for case-insensitive
1719-i Ignore navigation failures, log anyway
1820-v Verbose
21+ -a NUM Return cached content, unless it is older than NUM seconds. If
22+ NUM is zero or less, always return cached content
1923
2024API
2125
2832
2933debug = false
3034
35+ cache = data_root + ' /pup-cache'
36+
3137log = (x ... ) -> console .warn (new Date (), x... ) if debug
3238
3339_page = (argv ) ->
@@ -84,17 +90,49 @@ _page = (argv) ->
8490
8591 return [ browser, page ]
8692
93+ cachepath = (url ) ->
94+ h = crypto .createHash (' sha1' )
95+ h .update (url)
96+ sha1 = h .digest (' hex' )
97+ dir = " #{ cache} /" + sha1 .match (/ \w {5} / g ).join (' /' )
98+ mkdirp .sync (dir)
99+ " #{ dir} /#{ slugify (url)} "
100+
87101get = (argv ) ->
102+ content = argv ._ .map -> null
103+
104+ if argv .a ?
105+ argv ._ .forEach (url, i) ->
106+ file = cachepath (url)
107+ console .warn " cachepath: #{ file} " if debug
108+ if fs .existsSync (file)
109+ mtime = moment (fs .statSync (file).mtime )
110+ dur = mtime .diff (moment ())
111+ if argv .a <= 0 or dur/ 1000 < argv .a
112+ content[i] = fs .readFileSync (file)
113+ age = moment .duration (dur).humanize (true )
114+ console .warn " cache hit: #{ content[i].length } bytes from #{ age} "
115+ if not content[i] and debug
116+ console .warn " cache miss"
117+
118+ if content .filter (Boolean ).length is content .length
119+ return content
120+
88121 try
89122 [browser , page ] = await _page (argv)
90- content = []
91- await pr .each argv ._ , (url ) ->
123+ await pr .each argv ._ , (url , i ) ->
124+ return if content[i]
125+ console .warn " .goto(url)" if debug
92126 try
93127 await page .goto url, waitUntil : ' networkidle0'
94128 catch e
95129 if not argv[' ignore-nav-fail' ]
96130 throw e
97- content .push await page .content ()
131+ content[i] = await page .content ()
132+
133+ if argv .a ?
134+ fs .writeFileSync (cachepath (url), content[i])
135+
98136 await browser .close ()
99137 return content
100138 catch e
0 commit comments