Add index.js
This commit is contained in:
parent
11f036b4ca
commit
18071296ff
|
@ -0,0 +1,47 @@
|
||||||
|
#!/usr/bin/env node
|
||||||
|
|
||||||
|
|
||||||
|
const cherio = require('cherio');
|
||||||
|
const path = require('path');
|
||||||
|
const fs = require('fs-extra');
|
||||||
|
const fsp = fs.promises;
|
||||||
|
const mkdirp = require('util').promisify(fs.mkdirp);
|
||||||
|
const urllib = require('url');
|
||||||
|
|
||||||
|
async function* save(url,savedir) {
|
||||||
|
console.log({url});
|
||||||
|
var urlProperties = new URL(url);
|
||||||
|
var resp = await fetch(url);
|
||||||
|
var text = await resp.text();
|
||||||
|
var relativeSavePath = path.join.apply(null,urlProperties.pathname.split('/'));
|
||||||
|
if(relativeSavePath==='.') relativeSavePath = 'index.html'; //Catch root document
|
||||||
|
console.log({relativeSavePath});
|
||||||
|
var savepath = path.join(savedir,relativeSavePath);
|
||||||
|
await mkdirp(path.dirname(savepath));
|
||||||
|
console.log('Saving',url,savepath);
|
||||||
|
await fsp.writeFile(savepath,text);
|
||||||
|
var dom = cherio(text);
|
||||||
|
var linkElements = dom.find('a');
|
||||||
|
for(var link of linkElements.toArray()) {
|
||||||
|
var testurl = cherio(link).attr('href'); //Get href
|
||||||
|
if(!testurl) continue; //Ignore empty
|
||||||
|
testurl = urllib.resolve(url,testurl); //Handle relative urls
|
||||||
|
var testProperties = new URL(testurl); //Reject beyond the current domain
|
||||||
|
if(testProperties.host != urlProperties.host) continue;
|
||||||
|
yield testurl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function main(rooturl,savedir) {
|
||||||
|
var downloaded=[];
|
||||||
|
var todownload=[];
|
||||||
|
todownload.push(rooturl);
|
||||||
|
while(todownload.length) {
|
||||||
|
var saveurl = todownload.shift();
|
||||||
|
for await (var newUrl of save(saveurl,savedir)) {
|
||||||
|
if(!todownload.includes(newUrl) && !downloaded.includes(newUrl)) todownload.push(newUrl);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
main('https://projex.wiki/','out');
|
Loading…
Reference in New Issue