Add index.js
This commit is contained in:
parent
11f036b4ca
commit
18071296ff
|
@ -0,0 +1,47 @@
|
|||
#!/usr/bin/env node
|
||||
|
||||
|
||||
const cherio = require('cherio');
|
||||
const path = require('path');
|
||||
const fs = require('fs-extra');
|
||||
const fsp = fs.promises;
|
||||
const mkdirp = require('util').promisify(fs.mkdirp);
|
||||
const urllib = require('url');
|
||||
|
||||
async function* save(url,savedir) {
|
||||
console.log({url});
|
||||
var urlProperties = new URL(url);
|
||||
var resp = await fetch(url);
|
||||
var text = await resp.text();
|
||||
var relativeSavePath = path.join.apply(null,urlProperties.pathname.split('/'));
|
||||
if(relativeSavePath==='.') relativeSavePath = 'index.html'; //Catch root document
|
||||
console.log({relativeSavePath});
|
||||
var savepath = path.join(savedir,relativeSavePath);
|
||||
await mkdirp(path.dirname(savepath));
|
||||
console.log('Saving',url,savepath);
|
||||
await fsp.writeFile(savepath,text);
|
||||
var dom = cherio(text);
|
||||
var linkElements = dom.find('a');
|
||||
for(var link of linkElements.toArray()) {
|
||||
var testurl = cherio(link).attr('href'); //Get href
|
||||
if(!testurl) continue; //Ignore empty
|
||||
testurl = urllib.resolve(url,testurl); //Handle relative urls
|
||||
var testProperties = new URL(testurl); //Reject beyond the current domain
|
||||
if(testProperties.host != urlProperties.host) continue;
|
||||
yield testurl;
|
||||
}
|
||||
}
|
||||
|
||||
async function main(rooturl,savedir) {
|
||||
var downloaded=[];
|
||||
var todownload=[];
|
||||
todownload.push(rooturl);
|
||||
while(todownload.length) {
|
||||
var saveurl = todownload.shift();
|
||||
for await (var newUrl of save(saveurl,savedir)) {
|
||||
if(!todownload.includes(newUrl) && !downloaded.includes(newUrl)) todownload.push(newUrl);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
main('https://projex.wiki/','out');
|
Loading…
Reference in New Issue