From 16d48852f15a57c95da1d17d8b548d53f945921b Mon Sep 17 00:00:00 2001 From: Conduitry Date: Mon, 13 Jun 2022 19:08:48 -0400 Subject: [PATCH] refactor: look up fetch options per request --- .gitignore | 2 +- README.md | 6 +++--- _dl.js | 12 ++++++++---- dl-e621.js | 7 +++---- dl-furrybooru.js | 7 +++---- 5 files changed, 18 insertions(+), 16 deletions(-) diff --git a/.gitignore b/.gitignore index c56bcdf..6b0b27c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,2 @@ /node_modules/ -/dl-*.fetch-options.json +/fetch-options.*.json diff --git a/README.md b/README.md index 416a28a..e1574d9 100644 --- a/README.md +++ b/README.md @@ -1,13 +1,13 @@ # dl-fur -## `dl-*.fetch-options.json` +## `fetch-options.*.json` -Each `dl-*.js` will look for a corresponding `dl-*.fetch-options.json` and, if present, will use it as the second argument in all `fetch` calls. +Each request will look for a corresponding `fetch-options.[hostname].json` and, if present, will use it as the second argument in all `fetch` calls. For example, if you have a `cf_clearance` cookie for the Cloudflare protection in front of FurryBooru, you can create ```json -// dl-furrybooru.fetch-options.json +// fetch-options.furry.booru.org.json { "headers": { "cookie": "cf_clearance=whatever", diff --git a/_dl.js b/_dl.js index c727ed0..f7ead27 100644 --- a/_dl.js +++ b/_dl.js @@ -25,12 +25,16 @@ const make_query = (params) => { return str && `?${str}`; }; -export const dl = async ({ from, get_posts }) => { - const fetch_options = await fs.promises.readFile(new URL(from.slice(0, -3) + '.fetch-options.json'), 'utf8').then(JSON.parse, () => {}); +const fetch_options_lookup = {}; +const get_fetch_options = url => { + const { hostname } = new URL(url); + return fetch_options_lookup[hostname] ??= fs.promises.readFile(new URL(`./fetch-options.${hostname}.json`, import.meta.url), 'utf8').then(JSON.parse, () => {}); +}; +export const dl = async get_posts => { for (const query of process.argv.slice(2)) { const posts = []; - for await (const page of get_posts(query, (url, params) => fetch_200(url + make_query(params), fetch_options))) { + for await (const page of get_posts(query, async (url, params) => fetch_200(url + make_query(params), await get_fetch_options(url)))) { posts.push(...page); console.log(`\x1b[K${query}: ${posts.length} posts\x1b[A`); } @@ -42,7 +46,7 @@ export const dl = async ({ from, get_posts }) => { await fs.promises.access(dest); } catch { console.log(`${i + 1}/${posts.length}: ${dest}`); - const resp = await fetch_200(url, fetch_options); + const resp = await fetch_200(url, await get_fetch_options(url)); const buffer = Buffer.from(await resp.arrayBuffer()); const tmp = crypto.randomUUID(); await fs.promises.writeFile(tmp, buffer); diff --git a/dl-e621.js b/dl-e621.js index b8c7de1..7514e7a 100755 --- a/dl-e621.js +++ b/dl-e621.js @@ -2,9 +2,8 @@ import { dl } from './_dl.js'; -await dl({ - from: import.meta.url, - async *get_posts(query, get) { +await dl( + async function*(query, get) { for (let page = null, posts; !posts || posts.length === 320; page = `b${posts[319]?.id}`) { posts = (await (await get('https://e621.net/posts.json', { limit: 320, page, tags: query })).json()).posts; yield posts.map(post => ({ @@ -14,4 +13,4 @@ await dl({ })); } }, -}); +); diff --git a/dl-furrybooru.js b/dl-furrybooru.js index 24618ef..21aff7f 100755 --- a/dl-furrybooru.js +++ b/dl-furrybooru.js @@ -2,9 +2,8 @@ import { dl } from './_dl.js'; -await dl({ - from: import.meta.url, - async *get_posts(query, get) { +await dl( + async function*(query, get) { for (let pid = 0, posts; !posts || posts.length === 100; pid++) { posts = (await (await get('https://furry.booru.org/index.php', { page: 'dapi', s: 'post', q: 'index', limit: 100, pid, tags: query })).text()).match(//gs) || []; yield posts.map(post => { @@ -15,4 +14,4 @@ await dl({ }); } }, -}); +);