From 18071296ff4623b73dabbbae6fc9b98d640049bc Mon Sep 17 00:00:00 2001 From: x0x7 Date: Thu, 13 Feb 2025 20:13:41 -0500 Subject: [PATCH] Add index.js --- index.js | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 index.js diff --git a/index.js b/index.js new file mode 100644 index 0000000..fd543cc --- /dev/null +++ b/index.js @@ -0,0 +1,47 @@ +#!/usr/bin/env node + + +const cherio = require('cherio'); +const path = require('path'); +const fs = require('fs-extra'); +const fsp = fs.promises; +const mkdirp = require('util').promisify(fs.mkdirp); +const urllib = require('url'); + +async function* save(url,savedir) { + console.log({url}); + var urlProperties = new URL(url); + var resp = await fetch(url); + var text = await resp.text(); + var relativeSavePath = path.join.apply(null,urlProperties.pathname.split('/')); + if(relativeSavePath==='.') relativeSavePath = 'index.html'; //Catch root document + console.log({relativeSavePath}); + var savepath = path.join(savedir,relativeSavePath); + await mkdirp(path.dirname(savepath)); + console.log('Saving',url,savepath); + await fsp.writeFile(savepath,text); + var dom = cherio(text); + var linkElements = dom.find('a'); + for(var link of linkElements.toArray()) { + var testurl = cherio(link).attr('href'); //Get href + if(!testurl) continue; //Ignore empty + testurl = urllib.resolve(url,testurl); //Handle relative urls + var testProperties = new URL(testurl); //Reject beyond the current domain + if(testProperties.host != urlProperties.host) continue; + yield testurl; + } +} + +async function main(rooturl,savedir) { + var downloaded=[]; + var todownload=[]; + todownload.push(rooturl); + while(todownload.length) { + var saveurl = todownload.shift(); + for await (var newUrl of save(saveurl,savedir)) { + if(!todownload.includes(newUrl) && !downloaded.includes(newUrl)) todownload.push(newUrl); + } + } +} + +main('https://projex.wiki/','out');