refactor: look up fetch options per request

master
Conduitry 2022-06-13 19:08:48 -04:00
parent 6a278ec527
commit 16d48852f1
5 changed files with 18 additions and 16 deletions

2
.gitignore vendored
View File

@ -1,2 +1,2 @@
/node_modules/
/dl-*.fetch-options.json
/fetch-options.*.json

View File

@ -1,13 +1,13 @@
# dl-fur
## `dl-*.fetch-options.json`
## `fetch-options.*.json`
Each `dl-*.js` will look for a corresponding `dl-*.fetch-options.json` and, if present, will use it as the second argument in all `fetch` calls.
Each request will look for a corresponding `fetch-options.[hostname].json` and, if present, will use it as the second argument in all `fetch` calls.
For example, if you have a `cf_clearance` cookie for the Cloudflare protection in front of FurryBooru, you can create
```json
// dl-furrybooru.fetch-options.json
// fetch-options.furry.booru.org.json
{
"headers": {
"cookie": "cf_clearance=whatever",

12
_dl.js
View File

@ -25,12 +25,16 @@ const make_query = (params) => {
return str && `?${str}`;
};
export const dl = async ({ from, get_posts }) => {
const fetch_options = await fs.promises.readFile(new URL(from.slice(0, -3) + '.fetch-options.json'), 'utf8').then(JSON.parse, () => {});
const fetch_options_lookup = {};
const get_fetch_options = url => {
const { hostname } = new URL(url);
return fetch_options_lookup[hostname] ??= fs.promises.readFile(new URL(`./fetch-options.${hostname}.json`, import.meta.url), 'utf8').then(JSON.parse, () => {});
};
export const dl = async get_posts => {
for (const query of process.argv.slice(2)) {
const posts = [];
for await (const page of get_posts(query, (url, params) => fetch_200(url + make_query(params), fetch_options))) {
for await (const page of get_posts(query, async (url, params) => fetch_200(url + make_query(params), await get_fetch_options(url)))) {
posts.push(...page);
console.log(`\x1b[K${query}: ${posts.length} posts\x1b[A`);
}
@ -42,7 +46,7 @@ export const dl = async ({ from, get_posts }) => {
await fs.promises.access(dest);
} catch {
console.log(`${i + 1}/${posts.length}: ${dest}`);
const resp = await fetch_200(url, fetch_options);
const resp = await fetch_200(url, await get_fetch_options(url));
const buffer = Buffer.from(await resp.arrayBuffer());
const tmp = crypto.randomUUID();
await fs.promises.writeFile(tmp, buffer);

View File

@ -2,9 +2,8 @@
import { dl } from './_dl.js';
await dl({
from: import.meta.url,
async *get_posts(query, get) {
await dl(
async function*(query, get) {
for (let page = null, posts; !posts || posts.length === 320; page = `b${posts[319]?.id}`) {
posts = (await (await get('https://e621.net/posts.json', { limit: 320, page, tags: query })).json()).posts;
yield posts.map(post => ({
@ -14,4 +13,4 @@ await dl({
}));
}
},
});
);

View File

@ -2,9 +2,8 @@
import { dl } from './_dl.js';
await dl({
from: import.meta.url,
async *get_posts(query, get) {
await dl(
async function*(query, get) {
for (let pid = 0, posts; !posts || posts.length === 100; pid++) {
posts = (await (await get('https://furry.booru.org/index.php', { page: 'dapi', s: 'post', q: 'index', limit: 100, pid, tags: query })).text()).match(/<post\b.*?\/>/gs) || [];
yield posts.map(post => {
@ -15,4 +14,4 @@ await dl({
});
}
},
});
);