3232
3333debug = false
3434
35- cache = data_root + ' /pup-cache'
35+ cachepath = (url ) ->
36+ h = crypto .createHash (' sha1' )
37+ h .update (url)
38+ sha1 = h .digest (' hex' )
39+ dir = data_root + ' /pup-cache/' + sha1 .match (/ \w {5} / g ).join (' /' )
40+ mkdirp .sync (dir)
41+ " #{ dir} /#{ slugify (url)} " .match (/ ^ (. {0,250} )/ )[1 ]
42+
43+ # returns content of a url if it is less than age seconds old
44+ getcache = (url , age ) ->
45+ file = cachepath (url)
46+ console .warn " cachepath: #{ file} " if debug
47+ if fs .existsSync (file)
48+ mtime = moment (fs .statSync (file).mtime )
49+ dur = mtime .diff (moment ())
50+ if age <= 0 or Math .abs (dur/ 1000 ) < age
51+ content = fs .readFileSync (file) or ' '
52+ age_ago = moment .duration (dur).humanize (true )
53+ console .warn " cache hit: #{ content .length } bytes from #{ age_ago} " if debug
54+ if not content and debug
55+ console .warn " cache miss" if debug
56+ return content
57+
58+ setcache = (url , content ) ->
59+ file = cachepath (url)
60+ fs .writeFileSync (file, content)
61+ return file
3662
3763log = (x ... ) -> console .warn (new Date (), x... ) if debug
3864
@@ -90,30 +116,12 @@ _page = (argv) ->
90116
91117 return [ browser, page ]
92118
93- cachepath = (url ) ->
94- h = crypto .createHash (' sha1' )
95- h .update (url)
96- sha1 = h .digest (' hex' )
97- dir = " #{ cache} /" + sha1 .match (/ \w {5} / g ).join (' /' )
98- mkdirp .sync (dir)
99- " #{ dir} /#{ slugify (url)} "
100-
101119get = (argv ) ->
102120 content = argv ._ .map -> null
103121
104122 if argv .a ?
105123 argv ._ .forEach (url, i) ->
106- file = cachepath (url)
107- console .warn " cachepath: #{ file} " if debug
108- if fs .existsSync (file)
109- mtime = moment (fs .statSync (file).mtime )
110- dur = mtime .diff (moment ())
111- if argv .a <= 0 or dur/ 1000 < argv .a
112- content[i] = fs .readFileSync (file)
113- age = moment .duration (dur).humanize (true )
114- console .warn " cache hit: #{ content[i].length } bytes from #{ age} "
115- if not content[i] and debug
116- console .warn " cache miss"
124+ content[i] = getcache url, argv .a
117125
118126 if content .filter (Boolean ).length is content .length
119127 return content
@@ -131,7 +139,7 @@ get = (argv) ->
131139 content[i] = await page .content ()
132140
133141 if argv .a ?
134- fs . writeFileSync ( cachepath ( url) , content[i])
142+ setcache ( url, content[i])
135143
136144 await browser .close ()
137145 return content
@@ -161,4 +169,4 @@ unless module.parent
161169
162170 console .log (await get (argv)).join (' \n ' )
163171
164- module .exports = { page : _page, parse_args, get, log }
172+ module .exports = { page : _page, parse_args, get, log, getcache, setcache }
0 commit comments