Skip to content

Commit 9e5e5e3

Browse files
committed
add -a switch to pup to cache fetches
1 parent 2abe4e9 commit 9e5e5e3

File tree

3 files changed

+50
-5
lines changed

3 files changed

+50
-5
lines changed

lib/exec/pup.coffee

Lines changed: 43 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
1+
require "#{process.env.DOTFILES}/lib/node-globals"
12
puppeteer = require 'puppeteer'
2-
pr = require 'bluebird'
3-
fs = require 'fs'
3+
crypto = require 'crypto'
4+
mkdirp = require 'mkdirp'
5+
slugify = require 'slugify'
46

57
usage = """
68
usage: pup [options] URL [more urls...]
@@ -16,6 +18,8 @@ OPTIONS
1618
to set regex flags like "i" for case-insensitive
1719
-i Ignore navigation failures, log anyway
1820
-v Verbose
21+
-a NUM Return cached content, unless it is older than NUM seconds. If
22+
NUM is zero or less, always return cached content
1923
2024
API
2125
@@ -28,6 +32,8 @@ API
2832

2933
debug = false
3034

35+
cache = data_root + '/pup-cache'
36+
3137
log = (x...) -> console.warn(new Date(), x...) if debug
3238

3339
_page = (argv) ->
@@ -84,17 +90,49 @@ _page = (argv) ->
8490

8591
return [ browser, page ]
8692

93+
cachepath = (url) ->
94+
h = crypto.createHash('sha1')
95+
h.update(url)
96+
sha1 = h.digest('hex')
97+
dir = "#{cache}/" + sha1.match(/\w{5}/g).join('/')
98+
mkdirp.sync(dir)
99+
"#{dir}/#{slugify(url)}"
100+
87101
get = (argv) ->
102+
content = argv._.map -> null
103+
104+
if argv.a?
105+
argv._.forEach (url, i) ->
106+
file = cachepath(url)
107+
console.warn "cachepath: #{file}" if debug
108+
if fs.existsSync(file)
109+
mtime = moment(fs.statSync(file).mtime)
110+
dur = mtime.diff(moment())
111+
if argv.a <= 0 or dur/1000 < argv.a
112+
content[i] = fs.readFileSync(file)
113+
age = moment.duration(dur).humanize(true)
114+
console.warn "cache hit: #{content[i].length} bytes from #{age}"
115+
if not content[i] and debug
116+
console.warn "cache miss"
117+
118+
if content.filter(Boolean).length is content.length
119+
return content
120+
88121
try
89122
[browser, page] = await _page(argv)
90-
content = []
91-
await pr.each argv._, (url) ->
123+
await pr.each argv._, (url, i) ->
124+
return if content[i]
125+
console.warn ".goto(url)" if debug
92126
try
93127
await page.goto url, waitUntil:'networkidle0'
94128
catch e
95129
if not argv['ignore-nav-fail']
96130
throw e
97-
content.push await page.content()
131+
content[i] = await page.content()
132+
133+
if argv.a?
134+
fs.writeFileSync(cachepath(url), content[i])
135+
98136
await browser.close()
99137
return content
100138
catch e

lib/package.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
"jsdom": "^16.6.0",
1414
"lodash": "^4.17.21",
1515
"minimist": "^1.2.5",
16+
"mkdirp": "^1.0.4",
1617
"moment": "^2.29.1",
1718
"moment-timezone": "^0.5.33",
1819
"parse-domain": "2.3.4",
@@ -23,6 +24,7 @@
2324
"request-debug": "^0.2.0",
2425
"request-promise": "^4.2.5",
2526
"rss-generator": "^0.0.3",
27+
"slugify": "^1.6.5",
2628
"sqlite-async": "^1.1.3",
2729
"sqlite3": "^5.0.2"
2830
},

lib/yarn.lock

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5520,6 +5520,11 @@ slash@^2.0.0:
55205520
resolved "https://registry.yarnpkg.com/slash/-/slash-2.0.0.tgz#de552851a1759df3a8f206535442f5ec4ddeab44"
55215521
integrity sha512-ZYKh3Wh2z1PpEXWr0MpSBZ0V6mZHAQfYevttO11c51CaWjGTaadiKZ+wVt1PbMlDV5qhMFslpZCemhwOK7C89A==
55225522

5523+
slugify@^1.6.5:
5524+
version "1.6.5"
5525+
resolved "https://registry.yarnpkg.com/slugify/-/slugify-1.6.5.tgz#c8f5c072bf2135b80703589b39a3d41451fbe8c8"
5526+
integrity sha512-8mo9bslnBO3tr5PEVFzMPIWwWnipGS0xVbYf65zxDqfNwmzYn1LpiKNrR6DlClusuvo+hDHd1zKpmfAe83NQSQ==
5527+
55235528
snapdragon-node@^2.0.1:
55245529
version "2.1.1"
55255530
resolved "https://registry.yarnpkg.com/snapdragon-node/-/snapdragon-node-2.1.1.tgz#6c175f86ff14bdb0724563e8f3c1b021a286853b"

0 commit comments

Comments
 (0)