From f2c8a9728d168ff98fe6af24b43538c42a605b6d Mon Sep 17 00:00:00 2001 From: Victor Noguera Date: Sat, 2 May 2026 12:03:31 -0400 Subject: [PATCH] feat: add mid-range sellers by category analysis pipeline This new pipeline identifies products meeting specific monthly sold, price, seller count, and Amazon buy box share criteria across categories. It fetches comprehensive product data from Keepa and SP-API, analyzes it using an LLM, and persists the results. A key enhancement is the introduction of a dedicated Redis cache for Keepa and SP-API responses. This reduces API token consumption and improves performance for subsequent runs by caching enriched ASIN data with a 12-hour TTL. Products are saved regardless of their sellability status to provide a complete view. --- README.md | 32 + package.json | 1 + src/cache.ts | 42 +- src/mid-range-sellers-by-category.test.ts | 445 +++++ src/mid-range-sellers-by-category.ts | 1890 +++++++++++++++++++++ 5 files changed, 2409 insertions(+), 1 deletion(-) create mode 100644 src/mid-range-sellers-by-category.test.ts create mode 100644 src/mid-range-sellers-by-category.ts diff --git a/README.md b/README.md index e0ec131..2cb1988 100644 --- a/README.md +++ b/README.md @@ -45,6 +45,38 @@ bun run src/sp-test.ts B07SN9BHVV # Auth + sellers endpoint + pricing offer c bun run src/sp-test.ts --sellability B07SN9BHVV # Standalone sellability check ``` +## Category Pipelines + +Run category-focused discovery flows with Keepa + SP-API + LLM: + +```bash +bun run bestsellers +bun run monthly-sold +bun run mid-range +``` + +Mid-range process: + +- Script: `bun run mid-range` +- Source: `src/mid-range-sellers-by-category.ts` +- Default filters: + - Monthly sold between `100` and `1000` + - Price between `$15` and `$200` (using Keepa current price, fallback avg 90d) + - Seller count between `3` and `20` + - If Amazon is a seller, Amazon buy box share must be between `15%` and `85%` +- Sellability behavior: + - Sellability is still fetched and saved (`can_sell`, `sellability_status`, `sellability_reason`) + - Matching products are persisted regardless of sellability status +- Caching behavior: + - Uses Redis to cache Keepa + SP-API API enrichment per ASIN + - Cache TTL is fixed at `12 hours` + +Example: + +```bash +bun run mid-range --category-limit 10 --per-category-top 50 --category-candidate-pool 250 --min-monthly-sold 100 --max-monthly-sold 1000 --min-price 15 --max-price 200 --min-seller-count 3 --max-seller-count 20 --min-amazon-buybox-share-pct 15 --max-amazon-buybox-share-pct 85 +``` + ## UPC to ASIN Mapping You can map UPCs to ASINs directly through the Keepa integration in `src/keepa.ts`. diff --git a/package.json b/package.json index 9b09c6d..ebad1d4 100644 --- a/package.json +++ b/package.json @@ -6,6 +6,7 @@ "scripts": { "bestsellers": "bun run src/bestsellers-by-category.ts", "monthly-sold": "bun run src/top-monthly-sold-by-category.ts", + "mid-range": "bun run src/mid-range-sellers-by-category.ts", "upc": "bun run src/upc-lookup.ts", "upc-file": "bun run src/upc-file-analysis.ts", "start": "bun run src/index.ts", diff --git a/src/cache.ts b/src/cache.ts index 5de7710..9b2f908 100644 --- a/src/cache.ts +++ b/src/cache.ts @@ -1,10 +1,21 @@ import Redis from "ioredis"; import { config } from "./config.ts"; -import type { EnrichedProduct } from "./types.ts"; +import type { EnrichedProduct, KeepaData, SpApiData } from "./types.ts"; let redis: Redis | null = null; let disabled = false; +export type ApiCacheEntry = { + title: string; + keepa: KeepaData | null; + spApi: SpApiData; + fetchedAt: string; +}; + +function getApiCacheKey(asin: string): string { + return `api:asin:${asin}`; +} + export async function connectCache(): Promise { if (disabled) return; try { @@ -58,6 +69,35 @@ export async function setCache( } } +export async function getApiCache(asin: string): Promise { + if (!redis) return null; + try { + const raw = await redis.get(getApiCacheKey(asin)); + if (!raw) return null; + return JSON.parse(raw) as ApiCacheEntry; + } catch { + return null; + } +} + +export async function setApiCache( + asin: string, + data: ApiCacheEntry, + ttlSeconds: number, +): Promise { + if (!redis) return; + try { + await redis.set( + getApiCacheKey(asin), + JSON.stringify(data), + "EX", + ttlSeconds, + ); + } catch { + // Non-critical, continue without caching + } +} + export async function disconnectCache(): Promise { if (redis) { await redis.quit(); diff --git a/src/mid-range-sellers-by-category.test.ts b/src/mid-range-sellers-by-category.test.ts new file mode 100644 index 0000000..32ae6a1 --- /dev/null +++ b/src/mid-range-sellers-by-category.test.ts @@ -0,0 +1,445 @@ +import { test, expect, beforeAll, afterAll, beforeEach, mock } from "bun:test"; +import { Database } from "bun:sqlite"; +import { getDb, initDb, closeDb } from "./database.ts"; +import path from "node:path"; +import { rmSync, mkdirSync } from "node:fs"; + +const fetchSellabilityBatchMock = mock(async (asins: string[]) => { + return new Map( + asins.map((asin) => { + if (asin === "B000000003") { + return [ + asin, + { + canSell: false, + sellabilityStatus: "restricted" as const, + sellabilityReason: "restricted", + }, + ]; + } + + return [ + asin, + { + canSell: true, + sellabilityStatus: "available" as const, + sellabilityReason: "ok", + }, + ]; + }), + ); +}); + +const fetchSpApiPricingAndFeesMock = mock( + async (_asin: string, sellability: any) => ({ + fbaFee: 4, + fbmFee: 2, + referralFeePercent: 15, + estimatedSalePrice: 25, + canSell: sellability?.canSell ?? null, + sellabilityStatus: sellability?.sellabilityStatus ?? "unknown", + sellabilityReason: sellability?.sellabilityReason ?? "missing", + }), +); + +const analyzeProductsMock = mock(async (products: any[]) => { + return products.map((p) => ({ + asin: p.record.asin, + verdict: "FBA", + confidence: 90, + reasoning: "mocked", + })); +}); + +mock.module("./sp-api.ts", () => ({ + fetchSellabilityBatch: fetchSellabilityBatchMock, + fetchSpApiPricingAndFees: fetchSpApiPricingAndFeesMock, +})); + +mock.module("./llm.ts", () => ({ + analyzeProducts: analyzeProductsMock, +})); + +const modulePromise = import("./mid-range-sellers-by-category.ts"); + +const DB_TEST_PATH = path.join( + process.cwd(), + "test_output", + "test_mid_range_analysis.sqlite", +); + +let db: Database; +let processCategory: ( + db: Database, + runId: number, + category: any, + perCategoryTop: number, + categoryCandidatePool: number, + minMonthlySold: number, + maxMonthlySold: number, + minPrice: number, + maxPrice: number, + minSellerCount: number, + maxSellerCount: number, + minAmazonBuyboxSharePct: number, + maxAmazonBuyboxSharePct: number, +) => Promise; +let insertCategoryRunSummary: ( + db: Database, + summary: any, + runTimestamp: string, +) => Promise; +let originalFetch: typeof globalThis.fetch; + +beforeAll(async () => { + const mod = await modulePromise; + processCategory = mod.processCategory; + insertCategoryRunSummary = mod.insertCategoryRunSummary; + + rmSync(path.dirname(DB_TEST_PATH), { recursive: true, force: true }); + mkdirSync(path.dirname(DB_TEST_PATH), { recursive: true }); + initDb(DB_TEST_PATH); + db = getDb(DB_TEST_PATH); + + originalFetch = globalThis.fetch; +}); + +afterAll(() => { + globalThis.fetch = originalFetch; + closeDb(); + rmSync(path.dirname(DB_TEST_PATH), { recursive: true, force: true }); +}); + +beforeEach(() => { + db.run("DELETE FROM product_analysis_results"); + db.run("DELETE FROM category_analysis_runs"); + + globalThis.fetch = mock(async (input: string | URL | Request) => { + const rawUrl = + typeof input === "string" + ? input + : input instanceof URL + ? input.toString() + : input.url; + const url = new URL(rawUrl); + + if (url.pathname === "/bestsellers") { + return new Response( + JSON.stringify({ + bestSellersList: [ + "B000000001", + "B000000002", + "B000000003", + "B000000004", + "B000000005", + ], + tokensLeft: 10, + refillRate: 1, + }), + { status: 200 }, + ); + } + + if (url.pathname === "/product") { + return new Response( + JSON.stringify({ + products: [ + { + asin: "B000000001", + title: "Product One", + monthlySold: 600, + isAmazonSeller: true, + buyBoxStatsAmazon90: 40, + stats: { + current: [ + null, + null, + null, + 1000, + null, + null, + null, + null, + null, + null, + null, + 5, + null, + null, + null, + null, + null, + null, + 2599, + ], + avg: [2400, null, null, 1200], + }, + csv: [[1, 2599]], + categoryTree: [{ name: "Category 1" }], + }, + { + asin: "B000000002", + title: "Product Two", + monthlySold: 250, + isAmazonSeller: true, + buyBoxStatsAmazon90: 50, + stats: { + current: [ + null, + null, + null, + 2000, + null, + null, + null, + null, + null, + null, + null, + 3, + null, + null, + null, + null, + null, + null, + 1999, + ], + avg: [1800, null, null, 2200], + }, + csv: [[1, 1200]], + categoryTree: [{ name: "Category 1" }], + }, + { + asin: "B000000003", + title: "Product Three", + monthlySold: 800, + isAmazonSeller: true, + buyBoxStatsAmazon90: 50, + stats: { + current: [ + null, + null, + null, + 1500, + null, + null, + null, + null, + null, + null, + null, + 4, + null, + null, + null, + null, + null, + null, + 2099, + ], + avg: [2000, null, null, 1800], + }, + csv: [[1, 2099]], + categoryTree: [{ name: "Category 1" }], + }, + { + asin: "B000000004", + title: "Product Four", + monthlySold: 400, + isAmazonSeller: true, + buyBoxStatsAmazon90: 95, + stats: { + current: [ + null, + null, + null, + 3000, + null, + null, + null, + null, + null, + null, + null, + 4, + null, + null, + null, + null, + null, + null, + 2899, + ], + avg: [2600, null, null, 2800], + }, + csv: [[1, 2899]], + categoryTree: [{ name: "Category 1" }], + }, + { + asin: "B000000005", + title: "Product Five", + monthlySold: 450, + isAmazonSeller: false, + stats: { + current: [ + null, + null, + null, + 3200, + null, + null, + null, + null, + null, + null, + null, + 25, + null, + null, + null, + null, + null, + null, + 3500, + ], + avg: [3200, null, null, 3200], + }, + csv: [[1, 3500]], + categoryTree: [{ name: "Category 1" }], + }, + ], + tokensLeft: 10, + refillRate: 1, + }), + { status: 200 }, + ); + } + + return new Response("not found", { status: 404 }); + }) as unknown as typeof globalThis.fetch; +}); + +test("processCategory keeps mid-range matches even when sellability is restricted", async () => { + const mockCategory = { + id: 1, + label: "Category 1", + parentId: 0, + childCount: 0, + }; + + const runId = await insertCategoryRunSummary( + db, + { + categoryId: mockCategory.id, + categoryLabel: mockCategory.label, + topAsinsChecked: 0, + availableAsins: 0, + fba: 0, + fbm: 0, + skip: 0, + status: "running", + error: "", + results: [], + }, + new Date().toISOString(), + ); + + const summary = await processCategory( + db, + runId, + mockCategory, + 3, + 5, + 100, + 1000, + 15, + 200, + 3, + 20, + 15, + 85, + ); + + expect(summary.status).toBe("ok"); + expect(summary.topAsinsChecked).toBe(5); + expect(summary.availableAsins).toBe(2); + expect(summary.results?.length).toBe(2); + + const productResults = db + .query( + "SELECT asin, monthly_sold, can_sell, sellability_status FROM product_analysis_results ORDER BY monthly_sold DESC", + ) + .all() as Array<{ + asin: string; + monthly_sold: number; + can_sell: string; + sellability_status: string; + }>; + + expect(productResults.length).toBe(2); + expect(productResults.map((row) => row.asin)).toEqual([ + "B000000003", + "B000000001", + ]); + + const restricted = productResults.find((row) => row.asin === "B000000003"); + expect(restricted?.can_sell).toBe("no"); + expect(restricted?.sellability_status).toBe("restricted"); + + const sellable = productResults.find((row) => row.asin === "B000000001"); + expect(sellable?.can_sell).toBe("yes"); + expect(sellable?.sellability_status).toBe("available"); +}); + +test("processCategory returns empty when no products match mid-range criteria", async () => { + const mockCategory = { + id: 2, + label: "Category 2", + parentId: 0, + childCount: 0, + }; + + const runId = await insertCategoryRunSummary( + db, + { + categoryId: mockCategory.id, + categoryLabel: mockCategory.label, + topAsinsChecked: 0, + availableAsins: 0, + fba: 0, + fbm: 0, + skip: 0, + status: "running", + error: "", + results: [], + }, + new Date().toISOString(), + ); + + const summary = await processCategory( + db, + runId, + mockCategory, + 3, + 5, + 100, + 1000, + 500, + 600, + 3, + 20, + 15, + 85, + ); + + expect(summary.status).toBe("empty"); + expect(summary.topAsinsChecked).toBe(5); + expect(summary.availableAsins).toBe(0); + expect(summary.results?.length).toBe(0); + + const rows = db + .query("SELECT COUNT(*) as c FROM product_analysis_results") + .all() as Array<{ c: number }>; + expect(rows[0]?.c).toBe(0); +}); diff --git a/src/mid-range-sellers-by-category.ts b/src/mid-range-sellers-by-category.ts new file mode 100644 index 0000000..a1cbc51 --- /dev/null +++ b/src/mid-range-sellers-by-category.ts @@ -0,0 +1,1890 @@ +import { existsSync, mkdirSync, readFileSync } from "node:fs"; +import path from "node:path"; +import { type Database, getDb, initDb } from "./database.ts"; +import { config } from "./config.ts"; +import { + connectCache, + disconnectCache, + getApiCache, + setApiCache, +} from "./cache.ts"; +import { analyzeProducts } from "./llm.ts"; +import { fetchSellabilityBatch, fetchSpApiPricingAndFees } from "./sp-api.ts"; +import type { + AnalysisResult, + EnrichedProduct, + KeepaData, + LlmVerdict, + ProductRecord, + SellabilityInfo, + SpApiData, +} from "./types.ts"; + +type CategoryInfo = { + id: number; + label: string; + parentId: number; + childCount: number; +}; + +type ParsedArgs = { + outputDir: string; + categoryLimit: number; + perCategoryTop: number; + categoryCandidatePool: number; + minMonthlySold: number; + maxMonthlySold: number; + minPrice: number; + maxPrice: number; + minSellerCount: number; + maxSellerCount: number; + minAmazonBuyboxSharePct: number; + maxAmazonBuyboxSharePct: number; + blacklistFile: string; +}; + +type CategoryRunSummary = { + categoryId: number; + categoryLabel: string; + topAsinsChecked: number; + availableAsins: number; + fba: number; + fbm: number; + skip: number; + status: "running" | "ok" | "empty" | "failed"; + error: string; + runId?: number; + results?: AnalysisResult[]; +}; + +const KEEPA_BASE = "https://api.keepa.com"; +const DOMAIN_US = 1; +const AMAZON_US_SELLER_ID = "ATVPDKIKX0DER"; +const KEEPA_MINUTES_OFFSET = 21_564_000; +const DEFAULT_CATEGORY_LIMIT = 32; +const DEFAULT_PER_CATEGORY_TOP = 100; +const DEFAULT_CATEGORY_CANDIDATE_POOL = 500; +const DEFAULT_MIN_MONTHLY_SOLD = 100; +const DEFAULT_MAX_MONTHLY_SOLD = 1000; +const DEFAULT_MIN_PRICE = 15; +const DEFAULT_MAX_PRICE = 200; +const DEFAULT_MIN_SELLER_COUNT = 3; +const DEFAULT_MAX_SELLER_COUNT = 20; +const DEFAULT_MIN_AMAZON_BUYBOX_SHARE_PCT = 15; +const DEFAULT_MAX_AMAZON_BUYBOX_SHARE_PCT = 85; +const SELLABILITY_BATCH_SIZE = 60; +const LLM_BATCH_SIZE = 10; +const PRICING_CONCURRENCY = 5; +// Keep this conservative by default so lower-token Keepa plans avoid repeated 429 loops. +const KEEPA_PRODUCT_CHUNK_SIZE = 20; +const MID_RANGE_API_CACHE_TTL_SECONDS = 12 * 60 * 60; +const DEFAULT_BLACKLIST_FILE = path.join( + process.cwd(), + "category-blacklist.csv", +); + +let keepaTokensLeft = 1; +let keepaRefillRate = 1; +let keepaLastRequestMs = 0; + +function log( + level: "info" | "warn" | "error", + message: string, + ...args: any[] +) { + const timestamp = new Date().toISOString(); + console.log(`[${timestamp}] [${level.toUpperCase()}] ${message}`, ...args); +} + +function parseArgs(): ParsedArgs { + const args = process.argv.slice(2); + const outputDir = + readFlagValue(args, "--out-dir") ?? path.join(process.cwd(), "output"); + const blacklistFile = + readFlagValue(args, "--blacklist-file") ?? DEFAULT_BLACKLIST_FILE; + + const categoryLimitRaw = readFlagValue(args, "--category-limit"); + const perCategoryTopRaw = readFlagValue(args, "--per-category-top"); + const categoryCandidatePoolRaw = readFlagValue( + args, + "--category-candidate-pool", + ); + const minMonthlySoldRaw = readFlagValue(args, "--min-monthly-sold"); + const maxMonthlySoldRaw = readFlagValue(args, "--max-monthly-sold"); + const minPriceRaw = readFlagValue(args, "--min-price"); + const maxPriceRaw = readFlagValue(args, "--max-price"); + const minSellerCountRaw = readFlagValue(args, "--min-seller-count"); + const maxSellerCountRaw = readFlagValue(args, "--max-seller-count"); + const minAmazonBuyboxSharePctRaw = readFlagValue( + args, + "--min-amazon-buybox-share-pct", + ); + const maxAmazonBuyboxSharePctRaw = readFlagValue( + args, + "--max-amazon-buybox-share-pct", + ); + + const categoryLimit = categoryLimitRaw + ? Number(categoryLimitRaw) + : DEFAULT_CATEGORY_LIMIT; + const perCategoryTop = perCategoryTopRaw + ? Number(perCategoryTopRaw) + : DEFAULT_PER_CATEGORY_TOP; + const categoryCandidatePool = categoryCandidatePoolRaw + ? Number(categoryCandidatePoolRaw) + : DEFAULT_CATEGORY_CANDIDATE_POOL; + const minMonthlySold = minMonthlySoldRaw + ? Number(minMonthlySoldRaw) + : DEFAULT_MIN_MONTHLY_SOLD; + const maxMonthlySold = maxMonthlySoldRaw + ? Number(maxMonthlySoldRaw) + : DEFAULT_MAX_MONTHLY_SOLD; + const minPrice = minPriceRaw ? Number(minPriceRaw) : DEFAULT_MIN_PRICE; + const maxPrice = maxPriceRaw ? Number(maxPriceRaw) : DEFAULT_MAX_PRICE; + const minSellerCount = minSellerCountRaw + ? Number(minSellerCountRaw) + : DEFAULT_MIN_SELLER_COUNT; + const maxSellerCount = maxSellerCountRaw + ? Number(maxSellerCountRaw) + : DEFAULT_MAX_SELLER_COUNT; + const minAmazonBuyboxSharePct = minAmazonBuyboxSharePctRaw + ? Number(minAmazonBuyboxSharePctRaw) + : DEFAULT_MIN_AMAZON_BUYBOX_SHARE_PCT; + const maxAmazonBuyboxSharePct = maxAmazonBuyboxSharePctRaw + ? Number(maxAmazonBuyboxSharePctRaw) + : DEFAULT_MAX_AMAZON_BUYBOX_SHARE_PCT; + + if (!Number.isInteger(categoryLimit) || categoryLimit <= 0) { + printUsageAndExit("--category-limit must be a positive integer."); + } + + if (!Number.isInteger(perCategoryTop) || perCategoryTop <= 0) { + printUsageAndExit("--per-category-top must be a positive integer."); + } + + if (!Number.isInteger(categoryCandidatePool) || categoryCandidatePool <= 0) { + printUsageAndExit("--category-candidate-pool must be a positive integer."); + } + + if (categoryCandidatePool < perCategoryTop) { + printUsageAndExit( + "--category-candidate-pool must be greater than or equal to --per-category-top.", + ); + } + + if (!Number.isInteger(minMonthlySold) || minMonthlySold < 0) { + printUsageAndExit("--min-monthly-sold must be a non-negative integer."); + } + + if (!Number.isInteger(maxMonthlySold) || maxMonthlySold < 0) { + printUsageAndExit("--max-monthly-sold must be a non-negative integer."); + } + + if (maxMonthlySold < minMonthlySold) { + printUsageAndExit( + "--max-monthly-sold must be greater than or equal to --min-monthly-sold.", + ); + } + + if (!Number.isFinite(minPrice) || minPrice < 0) { + printUsageAndExit("--min-price must be a non-negative number."); + } + + if (!Number.isFinite(maxPrice) || maxPrice < 0) { + printUsageAndExit("--max-price must be a non-negative number."); + } + + if (maxPrice < minPrice) { + printUsageAndExit( + "--max-price must be greater than or equal to --min-price.", + ); + } + + if (!Number.isInteger(minSellerCount) || minSellerCount < 0) { + printUsageAndExit("--min-seller-count must be a non-negative integer."); + } + + if (!Number.isInteger(maxSellerCount) || maxSellerCount <= 0) { + printUsageAndExit("--max-seller-count must be a positive integer."); + } + + if (maxSellerCount < minSellerCount) { + printUsageAndExit( + "--max-seller-count must be greater than or equal to --min-seller-count.", + ); + } + + if ( + !Number.isFinite(minAmazonBuyboxSharePct) || + minAmazonBuyboxSharePct < 0 || + minAmazonBuyboxSharePct > 100 + ) { + printUsageAndExit( + "--min-amazon-buybox-share-pct must be a number between 0 and 100.", + ); + } + + if ( + !Number.isFinite(maxAmazonBuyboxSharePct) || + maxAmazonBuyboxSharePct < 0 || + maxAmazonBuyboxSharePct > 100 + ) { + printUsageAndExit( + "--max-amazon-buybox-share-pct must be a number between 0 and 100.", + ); + } + + if (maxAmazonBuyboxSharePct < minAmazonBuyboxSharePct) { + printUsageAndExit( + "--max-amazon-buybox-share-pct must be greater than or equal to --min-amazon-buybox-share-pct.", + ); + } + + return { + outputDir, + categoryLimit, + perCategoryTop, + categoryCandidatePool, + minMonthlySold, + maxMonthlySold, + minPrice, + maxPrice, + minSellerCount, + maxSellerCount, + minAmazonBuyboxSharePct, + maxAmazonBuyboxSharePct, + blacklistFile, + }; +} + +function readFlagValue(args: string[], flag: string): string | undefined { + const idx = args.indexOf(flag); + if (idx === -1) return undefined; + return args[idx + 1]; +} + +function printUsageAndExit(message: string): never { + if (message) { + log("error", message); + } + + log( + "error", + [ + "Usage:", + " bun run src/mid-range-sellers-by-category.ts [--category-limit 32] [--per-category-top 100] [--category-candidate-pool 500] [--min-monthly-sold 100] [--max-monthly-sold 1000] [--min-price 15] [--max-price 200] [--min-seller-count 3] [--max-seller-count 20] [--min-amazon-buybox-share-pct 15] [--max-amazon-buybox-share-pct 85] [--out-dir output] [--blacklist-file category-blacklist.csv]", + "", + "Flow:", + " 1) Discover categories and round-robin selection.", + " 2) For each category: fetch a candidate pool and compute sellability metadata.", + " 3) Select mid-range ASINs by monthlySold, price, sellerCount, and Amazon buy box share rules.", + " 4) Enrich selected ASINs with Keepa + SP-API pricing/fees.", + " 5) LLM-analyze and persist selected ASINs regardless of sellability status.", + ].join("\n"), + ); + + process.exit(1); +} + +export async function insertCategoryRunSummary( + db: Database, + summary: CategoryRunSummary, + runTimestamp: string, +): Promise { + const query = ` + INSERT INTO category_analysis_runs ( + category_id, category_label, run_timestamp, + top_asins_checked, available_asins, + fba_count, fbm_count, skip_count, + status, error_message + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?); + `; + const result = db.run(query, [ + summary.categoryId, + summary.categoryLabel, + runTimestamp, + summary.topAsinsChecked, + summary.availableAsins, + summary.fba, + summary.fbm, + summary.skip, + summary.status, + summary.error, + ]); + // Bun's SQLite client returns { changes: number, lastInsertRowid: number | bigint } + return Number(result.lastInsertRowid); +} + +export async function updateCategoryRunSummary( + db: Database, + runId: number, + summary: Pick< + CategoryRunSummary, + | "topAsinsChecked" + | "availableAsins" + | "fba" + | "fbm" + | "skip" + | "status" + | "error" + >, +): Promise { + db.run( + ` + UPDATE category_analysis_runs + SET + top_asins_checked = ?, + available_asins = ?, + fba_count = ?, + fbm_count = ?, + skip_count = ?, + status = ?, + error_message = ? + WHERE id = ? + `, + [ + summary.topAsinsChecked, + summary.availableAsins, + summary.fba, + summary.fbm, + summary.skip, + summary.status, + summary.error, + runId, + ], + ); +} + +export async function insertProductAnalysisResults( + db: Database, + runId: number, + results: AnalysisResult[], +): Promise { + if (results.length === 0) { + return; + } + + const insertStmt = db.prepare(` + INSERT INTO product_analysis_results ( + asin, run_id, name, brand, category, unit_cost, + current_price, avg_price_90d, avg_price_90d_sheet, + selling_price_sheet, sales_rank, sales_rank_avg_90d, + seller_count, amazon_is_seller, amazon_buybox_share_pct_90d, + monthly_sold, rank_drops_30d, rank_drops_90d, + fba_fee, fbm_fee, referral_percent, can_sell, + sellability_status, sellability_reason, + verdict, confidence, reasoning, fetched_at + ) VALUES ( + ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, + ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, + ?, ?, ?, ?, ?, ?, ?, ? + ) + ON CONFLICT(asin) DO UPDATE SET + run_id = excluded.run_id, + name = excluded.name, + brand = excluded.brand, + category = excluded.category, + unit_cost = excluded.unit_cost, + current_price = excluded.current_price, + avg_price_90d = excluded.avg_price_90d, + avg_price_90d_sheet = excluded.avg_price_90d_sheet, + selling_price_sheet = excluded.selling_price_sheet, + sales_rank = excluded.sales_rank, + sales_rank_avg_90d = excluded.sales_rank_avg_90d, + seller_count = excluded.seller_count, + amazon_is_seller = excluded.amazon_is_seller, + amazon_buybox_share_pct_90d = excluded.amazon_buybox_share_pct_90d, + monthly_sold = excluded.monthly_sold, + rank_drops_30d = excluded.rank_drops_30d, + rank_drops_90d = excluded.rank_drops_90d, + fba_fee = excluded.fba_fee, + fbm_fee = excluded.fbm_fee, + referral_percent = excluded.referral_percent, + can_sell = excluded.can_sell, + sellability_status = excluded.sellability_status, + sellability_reason = excluded.sellability_reason, + verdict = excluded.verdict, + confidence = excluded.confidence, + reasoning = excluded.reasoning, + fetched_at = excluded.fetched_at; + `); + + db.transaction((resultsBatch: AnalysisResult[]) => { + for (const r of resultsBatch) { + const price = + r.product.keepa?.currentPrice ?? + r.product.record.sellingPriceFromSheet ?? + r.product.spApi.estimatedSalePrice; + const rank = r.product.keepa?.salesRank ?? r.product.record.amazonRank; + + insertStmt.run( + r.product.record.asin, + runId, + r.product.record.name, + r.product.record.brand ?? null, + r.product.record.category ?? + r.product.keepa?.categoryTree?.join(" > ") ?? + null, + r.product.record.unitCost ?? null, + price ?? null, + r.product.keepa?.avgPrice90 ?? null, + r.product.record.avgPrice90FromSheet ?? null, + r.product.record.sellingPriceFromSheet ?? null, + rank ?? null, + r.product.keepa?.salesRankAvg90 ?? null, + r.product.keepa?.sellerCount ?? null, + r.product.keepa?.amazonIsSeller == null + ? null + : r.product.keepa.amazonIsSeller + ? 1 + : 0, + r.product.keepa?.amazonBuyboxSharePct90d ?? null, + r.product.keepa?.monthlySold ?? null, + r.product.keepa?.salesRankDrops30 ?? null, + r.product.keepa?.salesRankDrops90 ?? null, + r.product.spApi.fbaFee ?? null, + r.product.spApi.fbmFee ?? null, + r.product.spApi.referralFeePercent ?? null, + r.product.spApi.canSell == null + ? "unknown" + : r.product.spApi.canSell + ? "yes" + : "no", + r.product.spApi.sellabilityStatus ?? null, + r.product.spApi.sellabilityReason ?? null, + r.verdict.verdict, + r.verdict.confidence, + r.verdict.reasoning ?? null, + r.product.fetchedAt, + ); + } + })(results); // Execute the transaction with the results batch +} + +function loadCategoryBlacklist(filePath: string): Set { + const blacklist = new Set(); + + if (!existsSync(filePath)) { + log( + "warn", + `Blacklist file not found at ${filePath}; continuing with no excluded categories.`, + ); + return blacklist; + } + + const raw = readFileSync(filePath, "utf8"); + const lines = raw.split(/\r?\n/); + + for (let i = 0; i < lines.length; i++) { + const lineNumber = i + 1; + const line = lines[i] ?? ""; + const trimmed = line.trim(); + if (!trimmed || trimmed.startsWith("#")) continue; + + const [idPart, namePart] = trimmed.split(",", 2); + const idToken = idPart?.trim() ?? ""; + const nameToken = namePart?.trim() ?? ""; + + // Allow header row: id,name + if (idToken.toLowerCase() === "id") { + continue; + } + + if (!idToken) { + log( + "warn", + `Blacklist CSV line ${lineNumber}: missing id, row ignored (${trimmed}).`, + ); + continue; + } + + const id = Number(idToken); + if (!Number.isInteger(id) || id <= 0) { + log( + "warn", + `Blacklist CSV line ${lineNumber}: invalid id '${idToken}', row ignored (${trimmed}).`, + ); + continue; + } + + if (!nameToken) { + log( + "warn", + `Blacklist CSV line ${lineNumber}: missing name for id ${id}; accepted but please add name.`, + ); + } + + if (blacklist.has(id)) { + log( + "warn", + `Blacklist CSV line ${lineNumber}: duplicate id ${id}, keeping first occurrence.`, + ); + continue; + } + + blacklist.add(id); + } + + return blacklist; +} + +function assertSpApiPrerequisites(): void { + const missing: string[] = []; + if (!config.spApiClientId) missing.push("SP_API_CLIENT_ID"); + if (!config.spApiClientSecret) missing.push("SP_API_CLIENT_SECRET"); + if (!config.spApiRefreshToken) missing.push("SP_API_REFRESH_TOKEN"); + if (!config.spApiSellerId) missing.push("SP_API_SELLER_ID"); + + if (missing.length > 0) { + throw new Error(`Missing required SP-API env vars: ${missing.join(", ")}`); + } +} + +function sleep(ms: number): Promise { + return new Promise((resolve) => setTimeout(resolve, ms)); +} + +function sanitizeFileSegment(value: string): string { + const compact = value.trim().toLowerCase().replace(/\s+/g, "-"); + const safe = compact.replace(/[^a-z0-9-_]+/g, "-").replace(/-+/g, "-"); + return safe.replace(/^-|-$/g, "") || "category"; +} + +function parseKeepaRateLimitPayload(text: string): { + refillInMs?: number; + tokensLeft?: number; + refillRate?: number; +} { + try { + const parsed = JSON.parse(text); + return { + refillInMs: + typeof parsed?.refillIn === "number" && Number.isFinite(parsed.refillIn) + ? Math.max(0, parsed.refillIn) + : undefined, + tokensLeft: + typeof parsed?.tokensLeft === "number" && + Number.isFinite(parsed.tokensLeft) + ? parsed.tokensLeft + : undefined, + refillRate: + typeof parsed?.refillRate === "number" && + Number.isFinite(parsed.refillRate) + ? parsed.refillRate + : undefined, + }; + } catch { + return {}; + } +} + +function computeBackoffMs(attempt: number, refillInMs?: number): number { + const refillBased = refillInMs != null ? refillInMs + 1500 : 0; + const exponential = Math.min(60_000, 2 ** attempt * 1000); + const base = Math.max(refillBased, exponential); + return base + Math.floor(Math.random() * 750); +} + +async function waitForKeepaToken(): Promise { + if (keepaTokensLeft > 0) return; + + const elapsedMinutes = (Date.now() - keepaLastRequestMs) / 60_000; + const regenerated = Math.floor(elapsedMinutes * keepaRefillRate); + if (regenerated > 0) { + keepaTokensLeft += regenerated; + return; + } + + const waitMs = + Math.ceil((1 / keepaRefillRate) * 60_000) - + (Date.now() - keepaLastRequestMs); + + if (waitMs > 0) { + log( + "info", + `Keepa tokens depleted; waiting ${Math.ceil(waitMs / 1000)}s...`, + ); + await sleep(waitMs); + } + + keepaTokensLeft = 1; +} + +async function keepaGetJson(pathAndQuery: string): Promise { + let rateLimitHits = 0; + + while (true) { + await waitForKeepaToken(); + + const response = await fetch(`${KEEPA_BASE}${pathAndQuery}`); + keepaLastRequestMs = Date.now(); + + if (response.ok) { + const data = (await response.json()) as any; + if (typeof data?.tokensLeft === "number") { + keepaTokensLeft = data.tokensLeft; + } + if (typeof data?.refillRate === "number" && data.refillRate > 0) { + keepaRefillRate = data.refillRate; + } + return data; + } + + const text = await response.text(); + + if (response.status === 429) { + const rate = parseKeepaRateLimitPayload(text); + if (typeof rate.tokensLeft === "number") { + keepaTokensLeft = rate.tokensLeft; + } + if (typeof rate.refillRate === "number" && rate.refillRate > 0) { + keepaRefillRate = rate.refillRate; + } + + rateLimitHits++; + const waitMs = computeBackoffMs(rateLimitHits, rate.refillInMs); + log( + "warn", + `Keepa rate limited (429). Retry ${rateLimitHits} in ${Math.ceil(waitMs / 1000)}s...`, + ); + await sleep(waitMs); + continue; + } + + throw new Error(`Keepa HTTP ${response.status}: ${text}`); + } +} + +function normalizeCategoryList(data: any): CategoryInfo[] { + const deduped = new Map(); + + const addRawCategory = (value: any): void => { + const id = Number( + value?.catId ?? value?.categoryId ?? value?.id ?? value?.nodeId, + ); + if (!Number.isInteger(id) || id <= 0) return; + + const label = String( + value?.name ?? value?.label ?? `Category ${id}`, + ).trim(); + const parentId = Number(value?.parent ?? value?.parentId ?? -1); + const childCount = Array.isArray(value?.children) + ? value.children.length + : Number.isInteger(value?.childCount) + ? Number(value.childCount) + : 0; + + if (id === 0 || label.toLowerCase() === "root" || parentId === -1) { + return; + } + + if (!deduped.has(id)) { + deduped.set(id, { + id, + label: label || `Category ${id}`, + parentId, + childCount: Math.max(0, childCount), + }); + } + }; + + if (Array.isArray(data?.categories)) { + for (const value of data.categories) { + addRawCategory(value); + } + } + + if (data?.categories && typeof data.categories === "object") { + for (const value of Object.values(data.categories)) { + addRawCategory(value); + } + } + + if (Array.isArray(data?.categoryList)) { + for (const value of data.categoryList) { + addRawCategory(value); + } + } + + return [...deduped.values()]; +} + +function prioritizeLikelyBestsellerCategories( + categories: CategoryInfo[], +): CategoryInfo[] { + const leaves: CategoryInfo[] = []; + const nonLeaves: CategoryInfo[] = []; + + for (const category of categories) { + if (category.childCount === 0) { + leaves.push(category); + } else { + nonLeaves.push(category); + } + } + + const withNamedLabels = (list: CategoryInfo[]) => + list.filter((c) => !/^Category\s+\d+$/i.test(c.label)); + + const withFallbackLabels = (list: CategoryInfo[]) => + list.filter((c) => /^Category\s+\d+$/i.test(c.label)); + + return [ + ...withNamedLabels(leaves), + ...withFallbackLabels(leaves), + ...withNamedLabels(nonLeaves), + ...withFallbackLabels(nonLeaves), + ]; +} + +function resolveRootCategory( + category: CategoryInfo, + byId: Map, +): CategoryInfo { + let current = category; + const seen = new Set(); + + while (current.parentId > 0 && !seen.has(current.id)) { + seen.add(current.id); + const parent = byId.get(current.parentId); + if (!parent) break; + current = parent; + } + + return current; +} + +function selectCategoriesAcrossRoots( + categories: CategoryInfo[], + maxCategories: number, +): CategoryInfo[] { + const byId = new Map(categories.map((c) => [c.id, c])); + const grouped = new Map(); + + for (const category of categories) { + const root = resolveRootCategory(category, byId); + const bucket = grouped.get(root.id) ?? []; + bucket.push(category); + grouped.set(root.id, bucket); + } + + const rootIds = [...grouped.keys()]; + const selected: CategoryInfo[] = []; + let depth = 0; + + while (selected.length < maxCategories) { + let progressed = false; + + for (const rootId of rootIds) { + const bucket = grouped.get(rootId) ?? []; + if (depth >= bucket.length) continue; + selected.push(bucket[depth]!); + progressed = true; + if (selected.length >= maxCategories) break; + } + + if (!progressed) break; + depth++; + } + + return selected; +} + +async function discoverCategories( + maxCategories: number, +): Promise { + const data = await keepaGetJson( + `/category?key=${encodeURIComponent(config.keepaApiKey)}&domain=${DOMAIN_US}&category=0`, + ); + + const categories = normalizeCategoryList(data); + if (categories.length === 0) { + throw new Error("Keepa category discovery returned no usable categories."); + } + + const prioritized = prioritizeLikelyBestsellerCategories(categories); + return selectCategoriesAcrossRoots(prioritized, maxCategories); +} + +async function fetchCategoryBestSellerAsins( + category: CategoryInfo, + limit: number, +): Promise { + const query = new URLSearchParams({ + key: config.keepaApiKey, + domain: String(DOMAIN_US), + category: String(category.id), + range: "0", + variations: "0", + sublist: category.parentId > 0 ? "1" : "0", + }); + + const data = await keepaGetJson(`/bestsellers?${query.toString()}`); + + const bestSellersList = data?.bestSellersList; + const candidates = [ + bestSellersList, + bestSellersList?.asinList, + bestSellersList?.asins, + bestSellersList?.bestSellers, + bestSellersList?.bestSellerAsins, + data?.asinList, + data?.asins, + data?.bestsellers, + data?.bestSellers, + data?.bestSellerAsins, + data?.bestsellerList?.asinList, + data?.categories?.[String(category.id)]?.asinList, + ]; + + for (const value of candidates) { + if (Array.isArray(value)) { + return [ + ...new Set(value.map((v) => String(v).trim()).filter(Boolean)), + ].slice(0, limit); + } + } + + return []; +} + +async function fetchSellabilityMap( + asins: string[], +): Promise> { + const sellability = new Map(); + + for (let i = 0; i < asins.length; i += SELLABILITY_BATCH_SIZE) { + const chunk = asins.slice(i, i + SELLABILITY_BATCH_SIZE); + const chunkResults = await fetchSellabilityBatch(chunk); + + for (const asin of chunk) { + const info = chunkResults.get(asin) ?? { + canSell: null, + sellabilityStatus: "unknown" as const, + sellabilityReason: "Sellability check returned no result", + }; + sellability.set(asin, info); + } + + log( + "info", + ` Sellability progress: ${Math.min(i + chunk.length, asins.length)}/${asins.length}`, + ); + } + + return sellability; +} + +async function fetchSpApiMap( + asins: string[], + sellabilityMap: Map, +): Promise> { + const pricingQueue = [...asins]; + const spApiMap = new Map(); + let done = 0; + + async function worker(): Promise { + while (pricingQueue.length > 0) { + const asin = pricingQueue.shift(); + if (!asin) return; + + const sellability = sellabilityMap.get(asin) ?? { + canSell: null, + sellabilityStatus: "unknown", + sellabilityReason: "Sellability missing", + }; + + const spApi = await fetchSpApiPricingAndFees(asin, sellability); + spApiMap.set(asin, spApi); + + done++; + if (done % 10 === 0 || done === asins.length) { + log("info", ` Pricing progress: ${done}/${asins.length}`); + } + } + } + + const workers = Array.from( + { length: Math.min(PRICING_CONCURRENCY, asins.length || 1) }, + () => worker(), + ); + + await Promise.all(workers); + return spApiMap; +} + +function pickKeepaNumber(...values: unknown[]): number | null { + for (const value of values) { + if (typeof value !== "number" || !Number.isFinite(value)) continue; + if (value < 0) continue; + return value; + } + return null; +} + +function extractCurrentPrice(csv: number[][] | undefined): number | null { + if (!Array.isArray(csv)) return null; + + for (const series of [csv[0], csv[1]]) { + if (Array.isArray(series) && series.length >= 2) { + const lastPrice = series[series.length - 1]; + if (typeof lastPrice === "number" && lastPrice > 0) { + return Math.round((lastPrice / 100) * 100) / 100; + } + } + } + + return null; +} + +function parseKeepaProduct(product: Record): KeepaData { + const stats = product.stats; + const csv = product.csv; + const salesRankDrops30 = pickKeepaNumber( + product.salesRankDrops30, + stats?.salesRankDrops30, + ); + const salesRankDrops90 = + pickKeepaNumber(product.salesRankDrops90, stats?.salesRankDrops90) ?? + (salesRankDrops30 != null ? salesRankDrops30 * 3 : null); + const monthlySold = + pickKeepaNumber(product.monthlySold, stats?.monthlySold) ?? + salesRankDrops30; + const amazonIsSeller = resolveAmazonIsSeller(product, stats, csv); + const amazonBuyboxSharePct90d = + extractAmazonBuyboxSharePct90d(product, stats) ?? + computeAmazonBuyBoxSharePctFromHistory( + product.buyBoxSellerIdHistory, + 90, + new Set([AMAZON_US_SELLER_ID]), + ); + + return { + currentPrice: extractCurrentPrice(csv), + avgPrice90: stats?.avg?.[0] != null ? stats.avg[0] / 100 : null, + minPrice90: stats?.min?.[0] != null ? stats.min[0] / 100 : null, + maxPrice90: stats?.max?.[0] != null ? stats.max[0] / 100 : null, + salesRank: stats?.current?.[3] ?? null, + salesRankAvg90: stats?.avg?.[3] ?? null, + salesRankDrops30, + salesRankDrops90, + sellerCount: stats?.current?.[11] ?? null, + amazonIsSeller, + amazonBuyboxSharePct90d, + buyBoxSeller: product.buyBoxSellerId ?? null, + buyBoxPrice: stats?.current?.[18] != null ? stats.current[18] / 100 : null, + monthlySold, + categoryTree: + product.categoryTree?.map((c: { name: string }) => c.name) ?? [], + }; +} + +function resolveAmazonIsSeller( + product: Record, + stats: Record | undefined, + csv: number[][] | undefined, +): boolean | null { + if (typeof product.isAmazonSeller === "boolean") + return product.isAmazonSeller; + + if (typeof product.availabilityAmazon === "number") { + if (product.availabilityAmazon >= 0) return true; + if ( + product.availabilityAmazon === -1 || + product.availabilityAmazon === -2 + ) { + return false; + } + } + + if (stats?.buyBoxIsAmazon === true) return true; + + if (typeof stats?.current?.[0] === "number") { + if (stats.current[0] > 0) return true; + if (stats.current[0] === -1 || stats.current[0] === -2) return false; + } + + const latestAmazonPrice = extractLatestPositivePrice(csv?.[0]); + if (latestAmazonPrice != null) return true; + + return null; +} + +function extractAmazonBuyboxSharePct90d( + product: Record, + stats: Record | undefined, +): number | null { + const candidates: unknown[] = [ + product.buyBoxStatsAmazon90, + stats?.buyBoxStatsAmazon90, + product.buyBoxStats?.amazon90, + product.buyBoxStats?.amazon?.[90], + product.buyBoxStats?.amazon?.["90"], + product.buyBoxStats?.[AMAZON_US_SELLER_ID]?.[90], + product.buyBoxStats?.[AMAZON_US_SELLER_ID]?.["90"], + ]; + + for (const value of candidates) { + if (typeof value !== "number" || !Number.isFinite(value)) continue; + if (value < 0 || value > 100) continue; + return Math.round(value * 100) / 100; + } + + return null; +} + +function computeAmazonBuyBoxSharePctFromHistory( + history: unknown, + windowDays: number, + amazonSellerIds: Set, +): number | null { + if (!Array.isArray(history) || history.length < 2) return null; + + const nowKeepaMinutes = + Math.floor(Date.now() / 60_000) - KEEPA_MINUTES_OFFSET; + const windowStart = nowKeepaMinutes - windowDays * 24 * 60; + let qualifiedMinutes = 0; + let amazonMinutes = 0; + + for (let i = 0; i < history.length - 1; i += 2) { + const startMinute = Number.parseInt(String(history[i]), 10); + const sellerId = String(history[i + 1] ?? "").toUpperCase(); + const nextRaw = i + 2 < history.length ? history[i + 2] : nowKeepaMinutes; + const endMinute = Number.parseInt(String(nextRaw), 10); + + if (!Number.isFinite(startMinute) || !Number.isFinite(endMinute)) continue; + if (endMinute <= startMinute) continue; + + const intervalStart = Math.max(startMinute, windowStart); + const intervalEnd = Math.min(endMinute, nowKeepaMinutes); + if (intervalEnd <= intervalStart) continue; + + if (sellerId === "-1" || sellerId === "-2") continue; + + const minutes = intervalEnd - intervalStart; + qualifiedMinutes += minutes; + if (amazonSellerIds.has(sellerId)) { + amazonMinutes += minutes; + } + } + + if (qualifiedMinutes === 0) return null; + return Math.round((amazonMinutes / qualifiedMinutes) * 10_000) / 100; +} + +function extractLatestPositivePrice(series: unknown): number | null { + if (!Array.isArray(series) || series.length < 2) return null; + const last = series[series.length - 1]; + if (typeof last !== "number" || !Number.isFinite(last) || last <= 0) { + return null; + } + return last / 100; +} + +async function fetchKeepaEnrichmentMap( + asins: string[], + onChunkComplete?: ( + chunkMap: Map, + ) => void, +): Promise> { + const out = new Map(); + + for (let i = 0; i < asins.length; i += KEEPA_PRODUCT_CHUNK_SIZE) { + const chunk = asins.slice(i, i + KEEPA_PRODUCT_CHUNK_SIZE); + const chunkMap = new Map(); + const asinParam = encodeURIComponent(chunk.join(",")); + const data = await keepaGetJson( + `/product?key=${encodeURIComponent(config.keepaApiKey)}&domain=${DOMAIN_US}&asin=${asinParam}&stats=90&buybox=1&days=90`, + ); + + const products = Array.isArray(data?.products) ? data.products : []; + for (const product of products) { + const asin = String(product?.asin ?? "").trim(); + if (!asin) continue; + const parsed = { + keepa: parseKeepaProduct(product), + title: String(product?.title ?? "").trim(), + }; + out.set(asin, parsed); + chunkMap.set(asin, parsed); + } + + log( + "info", + ` Keepa enrichment progress: ${Math.min(i + chunk.length, asins.length)}/${asins.length}`, + ); + + if (onChunkComplete && chunkMap.size > 0) { + try { + onChunkComplete(chunkMap); + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + log("warn", ` Keepa chunk callback failed: ${message}`); + } + } + } + + return out; +} + +function selectMidRangeAsins( + asins: string[], + keepaEnrichmentMap: Map, + perCategoryTop: number, + minMonthlySold: number, + maxMonthlySold: number, + minPrice: number, + maxPrice: number, + minSellerCount: number, + maxSellerCount: number, + minAmazonBuyboxSharePct: number, + maxAmazonBuyboxSharePct: number, +): string[] { + return [...asins] + .map((asin) => ({ + asin, + keepa: keepaEnrichmentMap.get(asin)?.keepa, + })) + .filter( + (item): item is { asin: string; keepa: KeepaData } => item.keepa != null, + ) + .filter(({ keepa }) => { + if ( + typeof keepa.monthlySold !== "number" || + keepa.monthlySold < minMonthlySold || + keepa.monthlySold > maxMonthlySold + ) { + return false; + } + + const effectivePrice = keepa.currentPrice ?? keepa.avgPrice90; + if ( + typeof effectivePrice !== "number" || + !Number.isFinite(effectivePrice) || + effectivePrice < minPrice || + effectivePrice > maxPrice + ) { + return false; + } + + if ( + typeof keepa.sellerCount !== "number" || + keepa.sellerCount < minSellerCount || + keepa.sellerCount > maxSellerCount + ) { + return false; + } + + if (keepa.amazonIsSeller === true) { + if ( + typeof keepa.amazonBuyboxSharePct90d !== "number" || + !Number.isFinite(keepa.amazonBuyboxSharePct90d) || + keepa.amazonBuyboxSharePct90d < minAmazonBuyboxSharePct || + keepa.amazonBuyboxSharePct90d > maxAmazonBuyboxSharePct + ) { + return false; + } + } + + return true; + }) + .sort((a, b) => { + const monthlySoldDelta = + (b.keepa.monthlySold ?? 0) - (a.keepa.monthlySold ?? 0); + if (monthlySoldDelta !== 0) return monthlySoldDelta; + return ( + (a.keepa.sellerCount ?? Number.MAX_SAFE_INTEGER) - + (b.keepa.sellerCount ?? Number.MAX_SAFE_INTEGER) + ); + }) + .slice(0, perCategoryTop) + .map((item) => item.asin); +} + +function buildEnrichedProducts( + asins: string[], + sellabilityMap: Map, + spApiMap: Map, + keepaEnrichmentMap: Map, +): EnrichedProduct[] { + return asins.map((asin) => { + const sellability = sellabilityMap.get(asin) ?? { + canSell: null, + sellabilityStatus: "unknown" as const, + sellabilityReason: "Sellability missing", + }; + + const spApi = spApiMap.get(asin) ?? { + fbaFee: 0, + fbmFee: 0, + referralFeePercent: 15, + estimatedSalePrice: 0, + canSell: sellability.canSell, + sellabilityStatus: sellability.sellabilityStatus, + sellabilityReason: sellability.sellabilityReason, + }; + + const enrichedKeepa = keepaEnrichmentMap.get(asin); + const keepa = enrichedKeepa?.keepa ?? null; + const title = enrichedKeepa?.title ?? asin; + + const record: ProductRecord = { + asin, + name: title, + unitCost: 0, + category: undefined, + brand: undefined, + supplier: undefined, + }; + + if (keepa?.currentPrice && spApi.estimatedSalePrice === 0) { + spApi.estimatedSalePrice = keepa.currentPrice; + } + + return { + record, + keepa, + spApi, + fetchedAt: new Date().toISOString(), + }; + }); +} + +export async function processCategory( + db: Database, + runId: number, + category: CategoryInfo, + perCategoryTop: number, + categoryCandidatePool: number, + minMonthlySold: number, + maxMonthlySold: number, + minPrice: number, + maxPrice: number, + minSellerCount: number, + maxSellerCount: number, + minAmazonBuyboxSharePct: number, + maxAmazonBuyboxSharePct: number, +): Promise { + log("info", `\nCategory ${category.label} (${category.id})`); + + const topAsins = await fetchCategoryBestSellerAsins( + category, + categoryCandidatePool, + ); + if (topAsins.length === 0) { + log("info", " Keepa returned no ASINs for this category."); + await updateCategoryRunSummary(db, runId, { + topAsinsChecked: 0, + availableAsins: 0, + fba: 0, + fbm: 0, + skip: 0, + status: "empty", + error: "No ASINs returned by Keepa", + }); + return { + categoryId: category.id, + categoryLabel: category.label, + topAsinsChecked: 0, + availableAsins: 0, + fba: 0, + fbm: 0, + skip: 0, + status: "empty", + error: "No ASINs returned by Keepa", + results: [], + }; + } + + const uniqueTopAsins = Array.from(new Set(topAsins)); + if (uniqueTopAsins.length !== topAsins.length) { + log( + "warn", + ` Removed ${topAsins.length - uniqueTopAsins.length} duplicate ASINs before analysis.`, + ); + } + + log("info", ` Candidate ASINs fetched: ${uniqueTopAsins.length}`); + + const sellabilityMap = new Map(); + const keepaEnrichment = new Map< + string, + { keepa: KeepaData; title: string } + >(); + const cachedSpApiMap = new Map(); + const preAnalyzedByAsin = new Map(); + const scheduledAsins = new Set(); + const preanalysisTasks: Promise[] = []; + const uncachedAsins: string[] = []; + + for (const asin of uniqueTopAsins) { + const cached = await getApiCache(asin); + if (!cached) { + uncachedAsins.push(asin); + continue; + } + + if (!cached.keepa) { + uncachedAsins.push(asin); + continue; + } + + keepaEnrichment.set(asin, { + keepa: cached.keepa, + title: cached.title, + }); + cachedSpApiMap.set(asin, cached.spApi); + sellabilityMap.set(asin, { + canSell: cached.spApi.canSell, + sellabilityStatus: cached.spApi.sellabilityStatus, + sellabilityReason: cached.spApi.sellabilityReason, + }); + } + + async function schedulePreanalysisForAsins(asins: string[]): Promise { + const toSchedule = asins.filter( + (asin) => !scheduledAsins.has(asin) && keepaEnrichment.has(asin), + ); + + if (toSchedule.length === 0) { + return; + } + + for (const asin of toSchedule) { + scheduledAsins.add(asin); + } + + const task = (async () => { + const spApiMap = new Map(); + const uncachedSpApiAsins: string[] = []; + + for (const asin of toSchedule) { + const cached = cachedSpApiMap.get(asin); + if (cached) { + spApiMap.set(asin, cached); + continue; + } + uncachedSpApiAsins.push(asin); + } + + if (uncachedSpApiAsins.length > 0) { + const fetchedSpApiMap = await fetchSpApiMap( + uncachedSpApiAsins, + sellabilityMap, + ); + for (const [asin, spApi] of fetchedSpApiMap.entries()) { + spApiMap.set(asin, spApi); + cachedSpApiMap.set(asin, spApi); + } + } + + const enrichedProducts = buildEnrichedProducts( + toSchedule, + sellabilityMap, + spApiMap, + keepaEnrichment, + ); + + for (const product of enrichedProducts) { + await setApiCache( + product.record.asin, + { + title: product.record.name, + keepa: product.keepa, + spApi: product.spApi, + fetchedAt: product.fetchedAt, + }, + MID_RANGE_API_CACHE_TTL_SECONDS, + ); + } + + const preTotalBatches = Math.ceil( + enrichedProducts.length / LLM_BATCH_SIZE, + ); + for (let i = 0; i < enrichedProducts.length; i += LLM_BATCH_SIZE) { + const batch = enrichedProducts.slice(i, i + LLM_BATCH_SIZE); + const batchNum = Math.floor(i / LLM_BATCH_SIZE) + 1; + log( + "info", + ` Pre-analysis batch ${batchNum}/${preTotalBatches} (${batch.length} ASINs)...`, + ); + + let batchVerdicts: LlmVerdict[]; + try { + batchVerdicts = await analyzeProducts(batch); + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + log("warn", ` Pre-analysis batch failed: ${message}`); + continue; + } + + const verdictByAsin = new Map(batchVerdicts.map((v) => [v.asin, v])); + for (const product of batch) { + const verdict = verdictByAsin.get(product.record.asin); + if (!verdict) { + continue; + } + + preAnalyzedByAsin.set(product.record.asin, { + product, + verdict, + }); + } + } + })().catch((err) => { + const message = err instanceof Error ? err.message : String(err); + log("warn", ` Background pre-analysis failed: ${message}`); + }); + + preanalysisTasks.push(task); + } + + await schedulePreanalysisForAsins( + selectMidRangeAsins( + uniqueTopAsins, + keepaEnrichment, + perCategoryTop, + minMonthlySold, + maxMonthlySold, + minPrice, + maxPrice, + minSellerCount, + maxSellerCount, + minAmazonBuyboxSharePct, + maxAmazonBuyboxSharePct, + ), + ); + + if (uncachedAsins.length > 0) { + const fetchedSellability = await fetchSellabilityMap(uncachedAsins); + for (const [asin, info] of fetchedSellability.entries()) { + sellabilityMap.set(asin, info); + } + + const fetchedKeepa = await fetchKeepaEnrichmentMap( + uncachedAsins, + (chunkMap) => { + for (const [asin, value] of chunkMap.entries()) { + keepaEnrichment.set(asin, value); + } + + const provisionalSelectedAsins = selectMidRangeAsins( + uniqueTopAsins, + keepaEnrichment, + perCategoryTop, + minMonthlySold, + maxMonthlySold, + minPrice, + maxPrice, + minSellerCount, + maxSellerCount, + minAmazonBuyboxSharePct, + maxAmazonBuyboxSharePct, + ); + + void schedulePreanalysisForAsins(provisionalSelectedAsins); + }, + ); + for (const [asin, value] of fetchedKeepa.entries()) { + keepaEnrichment.set(asin, value); + } + } + + log( + "info", + ` API cache hits: ${uniqueTopAsins.length - uncachedAsins.length}/${uniqueTopAsins.length}`, + ); + + const sellableCount = uniqueTopAsins.filter((asin) => { + const info = sellabilityMap.get(asin); + return info?.canSell === true && info.sellabilityStatus === "available"; + }).length; + log( + "info", + ` Sellability snapshot: sellable=${sellableCount} non-sellable-or-unknown=${uniqueTopAsins.length - sellableCount}`, + ); + + const selectedAsins = selectMidRangeAsins( + uniqueTopAsins, + keepaEnrichment, + perCategoryTop, + minMonthlySold, + maxMonthlySold, + minPrice, + maxPrice, + minSellerCount, + maxSellerCount, + minAmazonBuyboxSharePct, + maxAmazonBuyboxSharePct, + ); + + log( + "info", + ` Selected mid-range ASINs: ${selectedAsins.length}/${uniqueTopAsins.length} (monthlySold=${minMonthlySold}-${maxMonthlySold}, price=${minPrice}-${maxPrice}, sellerCount=${minSellerCount}-${maxSellerCount}, amazonBuyboxShare=${minAmazonBuyboxSharePct}-${maxAmazonBuyboxSharePct} when Amazon sells)`, + ); + + if (selectedAsins.length === 0) { + await updateCategoryRunSummary(db, runId, { + topAsinsChecked: uniqueTopAsins.length, + availableAsins: 0, + fba: 0, + fbm: 0, + skip: 0, + status: "empty", + error: "No ASINs matched the configured mid-range criteria", + }); + return { + categoryId: category.id, + categoryLabel: category.label, + topAsinsChecked: uniqueTopAsins.length, + availableAsins: 0, + fba: 0, + fbm: 0, + skip: 0, + status: "empty", + error: "No ASINs matched the configured mid-range criteria", + results: [], + }; + } + + const spApiMap = new Map(); + await Promise.allSettled(preanalysisTasks); + + const resultByAsin = new Map(); + for (const asin of selectedAsins) { + const pre = preAnalyzedByAsin.get(asin); + if (pre) { + resultByAsin.set(asin, pre); + } + } + + const missingFinalAsins = selectedAsins.filter( + (asin) => !resultByAsin.has(asin), + ); + if (missingFinalAsins.length > 0) { + log( + "info", + ` Catch-up analysis required for ${missingFinalAsins.length}/${selectedAsins.length} selected ASINs...`, + ); + } + + const selectedUncachedSpApiAsins: string[] = []; + for (const asin of missingFinalAsins) { + const cached = cachedSpApiMap.get(asin); + if (cached) { + spApiMap.set(asin, cached); + } else { + selectedUncachedSpApiAsins.push(asin); + } + } + + if (selectedUncachedSpApiAsins.length > 0) { + const fetchedSpApiMap = await fetchSpApiMap( + selectedUncachedSpApiAsins, + sellabilityMap, + ); + for (const [asin, spApi] of fetchedSpApiMap.entries()) { + spApiMap.set(asin, spApi); + cachedSpApiMap.set(asin, spApi); + } + } + + const catchUpProducts = buildEnrichedProducts( + missingFinalAsins, + sellabilityMap, + spApiMap, + keepaEnrichment, + ); + + for (const product of catchUpProducts) { + await setApiCache( + product.record.asin, + { + title: product.record.name, + keepa: product.keepa, + spApi: product.spApi, + fetchedAt: product.fetchedAt, + }, + MID_RANGE_API_CACHE_TTL_SECONDS, + ); + } + + if (catchUpProducts.length > 0) { + const catchUpBatches = Math.ceil(catchUpProducts.length / LLM_BATCH_SIZE); + for (let i = 0; i < catchUpProducts.length; i += LLM_BATCH_SIZE) { + const batch = catchUpProducts.slice(i, i + LLM_BATCH_SIZE); + const batchNum = Math.floor(i / LLM_BATCH_SIZE) + 1; + log("info", ` Catch-up LLM batch ${batchNum}/${catchUpBatches}...`); + + let batchVerdicts: LlmVerdict[]; + try { + batchVerdicts = await analyzeProducts(batch); + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + log("warn", ` Catch-up LLM batch failed: ${message}`); + batchVerdicts = batch.map((p) => ({ + asin: p.record.asin, + verdict: "SKIP", + confidence: 0, + reasoning: "LLM analysis failed", + })); + } + + const verdictByAsin = new Map(batchVerdicts.map((v) => [v.asin, v])); + for (const product of batch) { + resultByAsin.set(product.record.asin, { + product, + verdict: verdictByAsin.get(product.record.asin) ?? { + asin: product.record.asin, + verdict: "SKIP", + confidence: 0, + reasoning: "LLM returned no verdict", + }, + }); + } + } + } + + const results: AnalysisResult[] = []; + for (const asin of selectedAsins) { + const existing = resultByAsin.get(asin); + if (existing) { + results.push(existing); + continue; + } + + const fallbackProducts = buildEnrichedProducts( + [asin], + sellabilityMap, + spApiMap, + keepaEnrichment, + ); + const fallbackProduct = fallbackProducts[0]; + if (!fallbackProduct) continue; + results.push({ + product: fallbackProduct, + verdict: { + asin, + verdict: "SKIP", + confidence: 0, + reasoning: "Missing pre-analysis and catch-up result", + }, + }); + } + + log( + "info", + ` Final selected ASINs resolved: ${results.length}/${selectedAsins.length} (pre-analyzed=${selectedAsins.length - missingFinalAsins.length}, catch-up=${missingFinalAsins.length})`, + ); + + let fba = 0; + let fbm = 0; + let skip = 0; + + const totalBatches = Math.ceil(results.length / LLM_BATCH_SIZE); + + for (let i = 0; i < results.length; i += LLM_BATCH_SIZE) { + const batchResults = results.slice(i, i + LLM_BATCH_SIZE); + const batchNum = Math.floor(i / LLM_BATCH_SIZE) + 1; + log("info", ` Persisting batch ${batchNum}/${totalBatches}...`); + + await insertProductAnalysisResults(db, runId, batchResults); + + for (const result of batchResults) { + if (result.verdict.verdict === "FBA") { + fba++; + } else if (result.verdict.verdict === "FBM") { + fbm++; + } else { + skip++; + } + } + + await updateCategoryRunSummary(db, runId, { + topAsinsChecked: uniqueTopAsins.length, + availableAsins: selectedAsins.length, + fba, + fbm, + skip, + status: "running", + error: "", + }); + + log( + "info", + ` Persisted batch ${batchNum}/${totalBatches} (${batchResults.length} rows, totals FBA/FBM/SKIP=${fba}/${fbm}/${skip})`, + ); + } + + await updateCategoryRunSummary(db, runId, { + topAsinsChecked: uniqueTopAsins.length, + availableAsins: selectedAsins.length, + fba, + fbm, + skip, + status: "ok", + error: "", + }); + + return { + categoryId: category.id, + categoryLabel: category.label, + topAsinsChecked: uniqueTopAsins.length, + availableAsins: selectedAsins.length, + fba, + fbm, + skip, + status: "ok", + error: "", + results, + }; +} + +export async function main(): Promise { + const args = parseArgs(); + assertSpApiPrerequisites(); + + await connectCache(); + try { + mkdirSync(args.outputDir, { recursive: true }); + const DB_PATH = + process.env.RESULTS_DB_PATH || path.join(process.cwd(), "results.db"); + initDb(DB_PATH); + const db = getDb(DB_PATH); + + log("info", "Starting per-category mid-range pipeline"); + log("info", `Marketplace: ${config.spApiMarketplaceId}`); + log("info", `SP-API region: ${config.spApiRegion}`); + log("info", `Category limit: ${args.categoryLimit}`); + log( + "info", + `Top ASINs per category after mid-range filter: ${args.perCategoryTop}`, + ); + log("info", `Category candidate pool: ${args.categoryCandidatePool}`); + log( + "info", + `Monthly sold range: ${args.minMonthlySold}-${args.maxMonthlySold}`, + ); + log("info", `Price range: ${args.minPrice}-${args.maxPrice}`); + log( + "info", + `Seller count range: ${args.minSellerCount}-${args.maxSellerCount}`, + ); + log( + "info", + `Amazon buybox share range (only when Amazon sells): ${args.minAmazonBuyboxSharePct}-${args.maxAmazonBuyboxSharePct}`, + ); + log( + "info", + `API cache TTL: ${Math.floor(MID_RANGE_API_CACHE_TTL_SECONDS / 3600)}h`, + ); + log("info", `Blacklist file: ${args.blacklistFile}`); + + const categoryBlacklist = loadCategoryBlacklist(args.blacklistFile); + log("info", `Loaded ${categoryBlacklist.size} blacklisted category IDs.`); + + const categories = await discoverCategories(args.categoryLimit); + const allowedCategories = categories.filter( + (c) => !categoryBlacklist.has(c.id), + ); + const blacklistedCount = categories.length - allowedCategories.length; + log( + "info", + `Discovered ${categories.length} categories (${blacklistedCount} blacklisted, ${allowedCategories.length} to process).`, + ); + + const runTimestamp = new Date().toISOString(); + let processedCategories = 0; + let totalInsertedAsins = 0; + const allCategorySummaries: CategoryRunSummary[] = []; + + for (const category of allowedCategories) { + let categorySummary: CategoryRunSummary; + let runId: number | undefined; + try { + runId = await insertCategoryRunSummary( + db, + { + categoryId: category.id, + categoryLabel: category.label, + topAsinsChecked: 0, + availableAsins: 0, + fba: 0, + fbm: 0, + skip: 0, + status: "running", + error: "", + results: [], + }, + runTimestamp, + ); + + categorySummary = await processCategory( + db, + runId, + category, + args.perCategoryTop, + args.categoryCandidatePool, + args.minMonthlySold, + args.maxMonthlySold, + args.minPrice, + args.maxPrice, + args.minSellerCount, + args.maxSellerCount, + args.minAmazonBuyboxSharePct, + args.maxAmazonBuyboxSharePct, + ); + + totalInsertedAsins += categorySummary.results?.length ?? 0; + + processedCategories++; + allCategorySummaries.push({ ...categorySummary, runId }); + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + log( + "warn", + `Skipping category ${category.label} (${category.id}) due to error: ${message}`, + ); + categorySummary = { + categoryId: category.id, + categoryLabel: category.label, + topAsinsChecked: 0, + availableAsins: 0, + fba: 0, + fbm: 0, + skip: 0, + status: "failed", + error: message, + results: [], + }; + if (runId) { + await updateCategoryRunSummary(db, runId, { + topAsinsChecked: 0, + availableAsins: 0, + fba: 0, + fbm: 0, + skip: 0, + status: "failed", + error: message, + }); + } + processedCategories++; + allCategorySummaries.push({ ...categorySummary, runId }); + } + } + + log("info", "\nRun summary"); + log("info", `Categories discovered/selected: ${categories.length}`); + log("info", `Categories processed: ${processedCategories}`); + log("info", `Total ASINs inserted into DB: ${totalInsertedAsins}`); + } finally { + await disconnectCache(); + } +} + +if (import.meta.main) { + main().catch((err) => { + log("error", `Mid-range process crashed: ${String(err)}`); + process.exit(1); + }); +}