diff --git a/README.md b/README.md index e0ec131..2cb1988 100644 --- a/README.md +++ b/README.md @@ -45,6 +45,38 @@ bun run src/sp-test.ts B07SN9BHVV # Auth + sellers endpoint + pricing offer c bun run src/sp-test.ts --sellability B07SN9BHVV # Standalone sellability check ``` +## Category Pipelines + +Run category-focused discovery flows with Keepa + SP-API + LLM: + +```bash +bun run bestsellers +bun run monthly-sold +bun run mid-range +``` + +Mid-range process: + +- Script: `bun run mid-range` +- Source: `src/mid-range-sellers-by-category.ts` +- Default filters: + - Monthly sold between `100` and `1000` + - Price between `$15` and `$200` (using Keepa current price, fallback avg 90d) + - Seller count between `3` and `20` + - If Amazon is a seller, Amazon buy box share must be between `15%` and `85%` +- Sellability behavior: + - Sellability is still fetched and saved (`can_sell`, `sellability_status`, `sellability_reason`) + - Matching products are persisted regardless of sellability status +- Caching behavior: + - Uses Redis to cache Keepa + SP-API API enrichment per ASIN + - Cache TTL is fixed at `12 hours` + +Example: + +```bash +bun run mid-range --category-limit 10 --per-category-top 50 --category-candidate-pool 250 --min-monthly-sold 100 --max-monthly-sold 1000 --min-price 15 --max-price 200 --min-seller-count 3 --max-seller-count 20 --min-amazon-buybox-share-pct 15 --max-amazon-buybox-share-pct 85 +``` + ## UPC to ASIN Mapping You can map UPCs to ASINs directly through the Keepa integration in `src/keepa.ts`. diff --git a/package.json b/package.json index 9b09c6d..ebad1d4 100644 --- a/package.json +++ b/package.json @@ -6,6 +6,7 @@ "scripts": { "bestsellers": "bun run src/bestsellers-by-category.ts", "monthly-sold": "bun run src/top-monthly-sold-by-category.ts", + "mid-range": "bun run src/mid-range-sellers-by-category.ts", "upc": "bun run src/upc-lookup.ts", "upc-file": "bun run src/upc-file-analysis.ts", "start": "bun run src/index.ts", diff --git a/src/cache.ts b/src/cache.ts index 5de7710..9b2f908 100644 --- a/src/cache.ts +++ b/src/cache.ts @@ -1,10 +1,21 @@ import Redis from "ioredis"; import { config } from "./config.ts"; -import type { EnrichedProduct } from "./types.ts"; +import type { EnrichedProduct, KeepaData, SpApiData } from "./types.ts"; let redis: Redis | null = null; let disabled = false; +export type ApiCacheEntry = { + title: string; + keepa: KeepaData | null; + spApi: SpApiData; + fetchedAt: string; +}; + +function getApiCacheKey(asin: string): string { + return `api:asin:${asin}`; +} + export async function connectCache(): Promise { if (disabled) return; try { @@ -58,6 +69,35 @@ export async function setCache( } } +export async function getApiCache(asin: string): Promise { + if (!redis) return null; + try { + const raw = await redis.get(getApiCacheKey(asin)); + if (!raw) return null; + return JSON.parse(raw) as ApiCacheEntry; + } catch { + return null; + } +} + +export async function setApiCache( + asin: string, + data: ApiCacheEntry, + ttlSeconds: number, +): Promise { + if (!redis) return; + try { + await redis.set( + getApiCacheKey(asin), + JSON.stringify(data), + "EX", + ttlSeconds, + ); + } catch { + // Non-critical, continue without caching + } +} + export async function disconnectCache(): Promise { if (redis) { await redis.quit(); diff --git a/src/mid-range-sellers-by-category.test.ts b/src/mid-range-sellers-by-category.test.ts new file mode 100644 index 0000000..32ae6a1 --- /dev/null +++ b/src/mid-range-sellers-by-category.test.ts @@ -0,0 +1,445 @@ +import { test, expect, beforeAll, afterAll, beforeEach, mock } from "bun:test"; +import { Database } from "bun:sqlite"; +import { getDb, initDb, closeDb } from "./database.ts"; +import path from "node:path"; +import { rmSync, mkdirSync } from "node:fs"; + +const fetchSellabilityBatchMock = mock(async (asins: string[]) => { + return new Map( + asins.map((asin) => { + if (asin === "B000000003") { + return [ + asin, + { + canSell: false, + sellabilityStatus: "restricted" as const, + sellabilityReason: "restricted", + }, + ]; + } + + return [ + asin, + { + canSell: true, + sellabilityStatus: "available" as const, + sellabilityReason: "ok", + }, + ]; + }), + ); +}); + +const fetchSpApiPricingAndFeesMock = mock( + async (_asin: string, sellability: any) => ({ + fbaFee: 4, + fbmFee: 2, + referralFeePercent: 15, + estimatedSalePrice: 25, + canSell: sellability?.canSell ?? null, + sellabilityStatus: sellability?.sellabilityStatus ?? "unknown", + sellabilityReason: sellability?.sellabilityReason ?? "missing", + }), +); + +const analyzeProductsMock = mock(async (products: any[]) => { + return products.map((p) => ({ + asin: p.record.asin, + verdict: "FBA", + confidence: 90, + reasoning: "mocked", + })); +}); + +mock.module("./sp-api.ts", () => ({ + fetchSellabilityBatch: fetchSellabilityBatchMock, + fetchSpApiPricingAndFees: fetchSpApiPricingAndFeesMock, +})); + +mock.module("./llm.ts", () => ({ + analyzeProducts: analyzeProductsMock, +})); + +const modulePromise = import("./mid-range-sellers-by-category.ts"); + +const DB_TEST_PATH = path.join( + process.cwd(), + "test_output", + "test_mid_range_analysis.sqlite", +); + +let db: Database; +let processCategory: ( + db: Database, + runId: number, + category: any, + perCategoryTop: number, + categoryCandidatePool: number, + minMonthlySold: number, + maxMonthlySold: number, + minPrice: number, + maxPrice: number, + minSellerCount: number, + maxSellerCount: number, + minAmazonBuyboxSharePct: number, + maxAmazonBuyboxSharePct: number, +) => Promise; +let insertCategoryRunSummary: ( + db: Database, + summary: any, + runTimestamp: string, +) => Promise; +let originalFetch: typeof globalThis.fetch; + +beforeAll(async () => { + const mod = await modulePromise; + processCategory = mod.processCategory; + insertCategoryRunSummary = mod.insertCategoryRunSummary; + + rmSync(path.dirname(DB_TEST_PATH), { recursive: true, force: true }); + mkdirSync(path.dirname(DB_TEST_PATH), { recursive: true }); + initDb(DB_TEST_PATH); + db = getDb(DB_TEST_PATH); + + originalFetch = globalThis.fetch; +}); + +afterAll(() => { + globalThis.fetch = originalFetch; + closeDb(); + rmSync(path.dirname(DB_TEST_PATH), { recursive: true, force: true }); +}); + +beforeEach(() => { + db.run("DELETE FROM product_analysis_results"); + db.run("DELETE FROM category_analysis_runs"); + + globalThis.fetch = mock(async (input: string | URL | Request) => { + const rawUrl = + typeof input === "string" + ? input + : input instanceof URL + ? input.toString() + : input.url; + const url = new URL(rawUrl); + + if (url.pathname === "/bestsellers") { + return new Response( + JSON.stringify({ + bestSellersList: [ + "B000000001", + "B000000002", + "B000000003", + "B000000004", + "B000000005", + ], + tokensLeft: 10, + refillRate: 1, + }), + { status: 200 }, + ); + } + + if (url.pathname === "/product") { + return new Response( + JSON.stringify({ + products: [ + { + asin: "B000000001", + title: "Product One", + monthlySold: 600, + isAmazonSeller: true, + buyBoxStatsAmazon90: 40, + stats: { + current: [ + null, + null, + null, + 1000, + null, + null, + null, + null, + null, + null, + null, + 5, + null, + null, + null, + null, + null, + null, + 2599, + ], + avg: [2400, null, null, 1200], + }, + csv: [[1, 2599]], + categoryTree: [{ name: "Category 1" }], + }, + { + asin: "B000000002", + title: "Product Two", + monthlySold: 250, + isAmazonSeller: true, + buyBoxStatsAmazon90: 50, + stats: { + current: [ + null, + null, + null, + 2000, + null, + null, + null, + null, + null, + null, + null, + 3, + null, + null, + null, + null, + null, + null, + 1999, + ], + avg: [1800, null, null, 2200], + }, + csv: [[1, 1200]], + categoryTree: [{ name: "Category 1" }], + }, + { + asin: "B000000003", + title: "Product Three", + monthlySold: 800, + isAmazonSeller: true, + buyBoxStatsAmazon90: 50, + stats: { + current: [ + null, + null, + null, + 1500, + null, + null, + null, + null, + null, + null, + null, + 4, + null, + null, + null, + null, + null, + null, + 2099, + ], + avg: [2000, null, null, 1800], + }, + csv: [[1, 2099]], + categoryTree: [{ name: "Category 1" }], + }, + { + asin: "B000000004", + title: "Product Four", + monthlySold: 400, + isAmazonSeller: true, + buyBoxStatsAmazon90: 95, + stats: { + current: [ + null, + null, + null, + 3000, + null, + null, + null, + null, + null, + null, + null, + 4, + null, + null, + null, + null, + null, + null, + 2899, + ], + avg: [2600, null, null, 2800], + }, + csv: [[1, 2899]], + categoryTree: [{ name: "Category 1" }], + }, + { + asin: "B000000005", + title: "Product Five", + monthlySold: 450, + isAmazonSeller: false, + stats: { + current: [ + null, + null, + null, + 3200, + null, + null, + null, + null, + null, + null, + null, + 25, + null, + null, + null, + null, + null, + null, + 3500, + ], + avg: [3200, null, null, 3200], + }, + csv: [[1, 3500]], + categoryTree: [{ name: "Category 1" }], + }, + ], + tokensLeft: 10, + refillRate: 1, + }), + { status: 200 }, + ); + } + + return new Response("not found", { status: 404 }); + }) as unknown as typeof globalThis.fetch; +}); + +test("processCategory keeps mid-range matches even when sellability is restricted", async () => { + const mockCategory = { + id: 1, + label: "Category 1", + parentId: 0, + childCount: 0, + }; + + const runId = await insertCategoryRunSummary( + db, + { + categoryId: mockCategory.id, + categoryLabel: mockCategory.label, + topAsinsChecked: 0, + availableAsins: 0, + fba: 0, + fbm: 0, + skip: 0, + status: "running", + error: "", + results: [], + }, + new Date().toISOString(), + ); + + const summary = await processCategory( + db, + runId, + mockCategory, + 3, + 5, + 100, + 1000, + 15, + 200, + 3, + 20, + 15, + 85, + ); + + expect(summary.status).toBe("ok"); + expect(summary.topAsinsChecked).toBe(5); + expect(summary.availableAsins).toBe(2); + expect(summary.results?.length).toBe(2); + + const productResults = db + .query( + "SELECT asin, monthly_sold, can_sell, sellability_status FROM product_analysis_results ORDER BY monthly_sold DESC", + ) + .all() as Array<{ + asin: string; + monthly_sold: number; + can_sell: string; + sellability_status: string; + }>; + + expect(productResults.length).toBe(2); + expect(productResults.map((row) => row.asin)).toEqual([ + "B000000003", + "B000000001", + ]); + + const restricted = productResults.find((row) => row.asin === "B000000003"); + expect(restricted?.can_sell).toBe("no"); + expect(restricted?.sellability_status).toBe("restricted"); + + const sellable = productResults.find((row) => row.asin === "B000000001"); + expect(sellable?.can_sell).toBe("yes"); + expect(sellable?.sellability_status).toBe("available"); +}); + +test("processCategory returns empty when no products match mid-range criteria", async () => { + const mockCategory = { + id: 2, + label: "Category 2", + parentId: 0, + childCount: 0, + }; + + const runId = await insertCategoryRunSummary( + db, + { + categoryId: mockCategory.id, + categoryLabel: mockCategory.label, + topAsinsChecked: 0, + availableAsins: 0, + fba: 0, + fbm: 0, + skip: 0, + status: "running", + error: "", + results: [], + }, + new Date().toISOString(), + ); + + const summary = await processCategory( + db, + runId, + mockCategory, + 3, + 5, + 100, + 1000, + 500, + 600, + 3, + 20, + 15, + 85, + ); + + expect(summary.status).toBe("empty"); + expect(summary.topAsinsChecked).toBe(5); + expect(summary.availableAsins).toBe(0); + expect(summary.results?.length).toBe(0); + + const rows = db + .query("SELECT COUNT(*) as c FROM product_analysis_results") + .all() as Array<{ c: number }>; + expect(rows[0]?.c).toBe(0); +}); diff --git a/src/mid-range-sellers-by-category.ts b/src/mid-range-sellers-by-category.ts new file mode 100644 index 0000000..a1cbc51 --- /dev/null +++ b/src/mid-range-sellers-by-category.ts @@ -0,0 +1,1890 @@ +import { existsSync, mkdirSync, readFileSync } from "node:fs"; +import path from "node:path"; +import { type Database, getDb, initDb } from "./database.ts"; +import { config } from "./config.ts"; +import { + connectCache, + disconnectCache, + getApiCache, + setApiCache, +} from "./cache.ts"; +import { analyzeProducts } from "./llm.ts"; +import { fetchSellabilityBatch, fetchSpApiPricingAndFees } from "./sp-api.ts"; +import type { + AnalysisResult, + EnrichedProduct, + KeepaData, + LlmVerdict, + ProductRecord, + SellabilityInfo, + SpApiData, +} from "./types.ts"; + +type CategoryInfo = { + id: number; + label: string; + parentId: number; + childCount: number; +}; + +type ParsedArgs = { + outputDir: string; + categoryLimit: number; + perCategoryTop: number; + categoryCandidatePool: number; + minMonthlySold: number; + maxMonthlySold: number; + minPrice: number; + maxPrice: number; + minSellerCount: number; + maxSellerCount: number; + minAmazonBuyboxSharePct: number; + maxAmazonBuyboxSharePct: number; + blacklistFile: string; +}; + +type CategoryRunSummary = { + categoryId: number; + categoryLabel: string; + topAsinsChecked: number; + availableAsins: number; + fba: number; + fbm: number; + skip: number; + status: "running" | "ok" | "empty" | "failed"; + error: string; + runId?: number; + results?: AnalysisResult[]; +}; + +const KEEPA_BASE = "https://api.keepa.com"; +const DOMAIN_US = 1; +const AMAZON_US_SELLER_ID = "ATVPDKIKX0DER"; +const KEEPA_MINUTES_OFFSET = 21_564_000; +const DEFAULT_CATEGORY_LIMIT = 32; +const DEFAULT_PER_CATEGORY_TOP = 100; +const DEFAULT_CATEGORY_CANDIDATE_POOL = 500; +const DEFAULT_MIN_MONTHLY_SOLD = 100; +const DEFAULT_MAX_MONTHLY_SOLD = 1000; +const DEFAULT_MIN_PRICE = 15; +const DEFAULT_MAX_PRICE = 200; +const DEFAULT_MIN_SELLER_COUNT = 3; +const DEFAULT_MAX_SELLER_COUNT = 20; +const DEFAULT_MIN_AMAZON_BUYBOX_SHARE_PCT = 15; +const DEFAULT_MAX_AMAZON_BUYBOX_SHARE_PCT = 85; +const SELLABILITY_BATCH_SIZE = 60; +const LLM_BATCH_SIZE = 10; +const PRICING_CONCURRENCY = 5; +// Keep this conservative by default so lower-token Keepa plans avoid repeated 429 loops. +const KEEPA_PRODUCT_CHUNK_SIZE = 20; +const MID_RANGE_API_CACHE_TTL_SECONDS = 12 * 60 * 60; +const DEFAULT_BLACKLIST_FILE = path.join( + process.cwd(), + "category-blacklist.csv", +); + +let keepaTokensLeft = 1; +let keepaRefillRate = 1; +let keepaLastRequestMs = 0; + +function log( + level: "info" | "warn" | "error", + message: string, + ...args: any[] +) { + const timestamp = new Date().toISOString(); + console.log(`[${timestamp}] [${level.toUpperCase()}] ${message}`, ...args); +} + +function parseArgs(): ParsedArgs { + const args = process.argv.slice(2); + const outputDir = + readFlagValue(args, "--out-dir") ?? path.join(process.cwd(), "output"); + const blacklistFile = + readFlagValue(args, "--blacklist-file") ?? DEFAULT_BLACKLIST_FILE; + + const categoryLimitRaw = readFlagValue(args, "--category-limit"); + const perCategoryTopRaw = readFlagValue(args, "--per-category-top"); + const categoryCandidatePoolRaw = readFlagValue( + args, + "--category-candidate-pool", + ); + const minMonthlySoldRaw = readFlagValue(args, "--min-monthly-sold"); + const maxMonthlySoldRaw = readFlagValue(args, "--max-monthly-sold"); + const minPriceRaw = readFlagValue(args, "--min-price"); + const maxPriceRaw = readFlagValue(args, "--max-price"); + const minSellerCountRaw = readFlagValue(args, "--min-seller-count"); + const maxSellerCountRaw = readFlagValue(args, "--max-seller-count"); + const minAmazonBuyboxSharePctRaw = readFlagValue( + args, + "--min-amazon-buybox-share-pct", + ); + const maxAmazonBuyboxSharePctRaw = readFlagValue( + args, + "--max-amazon-buybox-share-pct", + ); + + const categoryLimit = categoryLimitRaw + ? Number(categoryLimitRaw) + : DEFAULT_CATEGORY_LIMIT; + const perCategoryTop = perCategoryTopRaw + ? Number(perCategoryTopRaw) + : DEFAULT_PER_CATEGORY_TOP; + const categoryCandidatePool = categoryCandidatePoolRaw + ? Number(categoryCandidatePoolRaw) + : DEFAULT_CATEGORY_CANDIDATE_POOL; + const minMonthlySold = minMonthlySoldRaw + ? Number(minMonthlySoldRaw) + : DEFAULT_MIN_MONTHLY_SOLD; + const maxMonthlySold = maxMonthlySoldRaw + ? Number(maxMonthlySoldRaw) + : DEFAULT_MAX_MONTHLY_SOLD; + const minPrice = minPriceRaw ? Number(minPriceRaw) : DEFAULT_MIN_PRICE; + const maxPrice = maxPriceRaw ? Number(maxPriceRaw) : DEFAULT_MAX_PRICE; + const minSellerCount = minSellerCountRaw + ? Number(minSellerCountRaw) + : DEFAULT_MIN_SELLER_COUNT; + const maxSellerCount = maxSellerCountRaw + ? Number(maxSellerCountRaw) + : DEFAULT_MAX_SELLER_COUNT; + const minAmazonBuyboxSharePct = minAmazonBuyboxSharePctRaw + ? Number(minAmazonBuyboxSharePctRaw) + : DEFAULT_MIN_AMAZON_BUYBOX_SHARE_PCT; + const maxAmazonBuyboxSharePct = maxAmazonBuyboxSharePctRaw + ? Number(maxAmazonBuyboxSharePctRaw) + : DEFAULT_MAX_AMAZON_BUYBOX_SHARE_PCT; + + if (!Number.isInteger(categoryLimit) || categoryLimit <= 0) { + printUsageAndExit("--category-limit must be a positive integer."); + } + + if (!Number.isInteger(perCategoryTop) || perCategoryTop <= 0) { + printUsageAndExit("--per-category-top must be a positive integer."); + } + + if (!Number.isInteger(categoryCandidatePool) || categoryCandidatePool <= 0) { + printUsageAndExit("--category-candidate-pool must be a positive integer."); + } + + if (categoryCandidatePool < perCategoryTop) { + printUsageAndExit( + "--category-candidate-pool must be greater than or equal to --per-category-top.", + ); + } + + if (!Number.isInteger(minMonthlySold) || minMonthlySold < 0) { + printUsageAndExit("--min-monthly-sold must be a non-negative integer."); + } + + if (!Number.isInteger(maxMonthlySold) || maxMonthlySold < 0) { + printUsageAndExit("--max-monthly-sold must be a non-negative integer."); + } + + if (maxMonthlySold < minMonthlySold) { + printUsageAndExit( + "--max-monthly-sold must be greater than or equal to --min-monthly-sold.", + ); + } + + if (!Number.isFinite(minPrice) || minPrice < 0) { + printUsageAndExit("--min-price must be a non-negative number."); + } + + if (!Number.isFinite(maxPrice) || maxPrice < 0) { + printUsageAndExit("--max-price must be a non-negative number."); + } + + if (maxPrice < minPrice) { + printUsageAndExit( + "--max-price must be greater than or equal to --min-price.", + ); + } + + if (!Number.isInteger(minSellerCount) || minSellerCount < 0) { + printUsageAndExit("--min-seller-count must be a non-negative integer."); + } + + if (!Number.isInteger(maxSellerCount) || maxSellerCount <= 0) { + printUsageAndExit("--max-seller-count must be a positive integer."); + } + + if (maxSellerCount < minSellerCount) { + printUsageAndExit( + "--max-seller-count must be greater than or equal to --min-seller-count.", + ); + } + + if ( + !Number.isFinite(minAmazonBuyboxSharePct) || + minAmazonBuyboxSharePct < 0 || + minAmazonBuyboxSharePct > 100 + ) { + printUsageAndExit( + "--min-amazon-buybox-share-pct must be a number between 0 and 100.", + ); + } + + if ( + !Number.isFinite(maxAmazonBuyboxSharePct) || + maxAmazonBuyboxSharePct < 0 || + maxAmazonBuyboxSharePct > 100 + ) { + printUsageAndExit( + "--max-amazon-buybox-share-pct must be a number between 0 and 100.", + ); + } + + if (maxAmazonBuyboxSharePct < minAmazonBuyboxSharePct) { + printUsageAndExit( + "--max-amazon-buybox-share-pct must be greater than or equal to --min-amazon-buybox-share-pct.", + ); + } + + return { + outputDir, + categoryLimit, + perCategoryTop, + categoryCandidatePool, + minMonthlySold, + maxMonthlySold, + minPrice, + maxPrice, + minSellerCount, + maxSellerCount, + minAmazonBuyboxSharePct, + maxAmazonBuyboxSharePct, + blacklistFile, + }; +} + +function readFlagValue(args: string[], flag: string): string | undefined { + const idx = args.indexOf(flag); + if (idx === -1) return undefined; + return args[idx + 1]; +} + +function printUsageAndExit(message: string): never { + if (message) { + log("error", message); + } + + log( + "error", + [ + "Usage:", + " bun run src/mid-range-sellers-by-category.ts [--category-limit 32] [--per-category-top 100] [--category-candidate-pool 500] [--min-monthly-sold 100] [--max-monthly-sold 1000] [--min-price 15] [--max-price 200] [--min-seller-count 3] [--max-seller-count 20] [--min-amazon-buybox-share-pct 15] [--max-amazon-buybox-share-pct 85] [--out-dir output] [--blacklist-file category-blacklist.csv]", + "", + "Flow:", + " 1) Discover categories and round-robin selection.", + " 2) For each category: fetch a candidate pool and compute sellability metadata.", + " 3) Select mid-range ASINs by monthlySold, price, sellerCount, and Amazon buy box share rules.", + " 4) Enrich selected ASINs with Keepa + SP-API pricing/fees.", + " 5) LLM-analyze and persist selected ASINs regardless of sellability status.", + ].join("\n"), + ); + + process.exit(1); +} + +export async function insertCategoryRunSummary( + db: Database, + summary: CategoryRunSummary, + runTimestamp: string, +): Promise { + const query = ` + INSERT INTO category_analysis_runs ( + category_id, category_label, run_timestamp, + top_asins_checked, available_asins, + fba_count, fbm_count, skip_count, + status, error_message + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?); + `; + const result = db.run(query, [ + summary.categoryId, + summary.categoryLabel, + runTimestamp, + summary.topAsinsChecked, + summary.availableAsins, + summary.fba, + summary.fbm, + summary.skip, + summary.status, + summary.error, + ]); + // Bun's SQLite client returns { changes: number, lastInsertRowid: number | bigint } + return Number(result.lastInsertRowid); +} + +export async function updateCategoryRunSummary( + db: Database, + runId: number, + summary: Pick< + CategoryRunSummary, + | "topAsinsChecked" + | "availableAsins" + | "fba" + | "fbm" + | "skip" + | "status" + | "error" + >, +): Promise { + db.run( + ` + UPDATE category_analysis_runs + SET + top_asins_checked = ?, + available_asins = ?, + fba_count = ?, + fbm_count = ?, + skip_count = ?, + status = ?, + error_message = ? + WHERE id = ? + `, + [ + summary.topAsinsChecked, + summary.availableAsins, + summary.fba, + summary.fbm, + summary.skip, + summary.status, + summary.error, + runId, + ], + ); +} + +export async function insertProductAnalysisResults( + db: Database, + runId: number, + results: AnalysisResult[], +): Promise { + if (results.length === 0) { + return; + } + + const insertStmt = db.prepare(` + INSERT INTO product_analysis_results ( + asin, run_id, name, brand, category, unit_cost, + current_price, avg_price_90d, avg_price_90d_sheet, + selling_price_sheet, sales_rank, sales_rank_avg_90d, + seller_count, amazon_is_seller, amazon_buybox_share_pct_90d, + monthly_sold, rank_drops_30d, rank_drops_90d, + fba_fee, fbm_fee, referral_percent, can_sell, + sellability_status, sellability_reason, + verdict, confidence, reasoning, fetched_at + ) VALUES ( + ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, + ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, + ?, ?, ?, ?, ?, ?, ?, ? + ) + ON CONFLICT(asin) DO UPDATE SET + run_id = excluded.run_id, + name = excluded.name, + brand = excluded.brand, + category = excluded.category, + unit_cost = excluded.unit_cost, + current_price = excluded.current_price, + avg_price_90d = excluded.avg_price_90d, + avg_price_90d_sheet = excluded.avg_price_90d_sheet, + selling_price_sheet = excluded.selling_price_sheet, + sales_rank = excluded.sales_rank, + sales_rank_avg_90d = excluded.sales_rank_avg_90d, + seller_count = excluded.seller_count, + amazon_is_seller = excluded.amazon_is_seller, + amazon_buybox_share_pct_90d = excluded.amazon_buybox_share_pct_90d, + monthly_sold = excluded.monthly_sold, + rank_drops_30d = excluded.rank_drops_30d, + rank_drops_90d = excluded.rank_drops_90d, + fba_fee = excluded.fba_fee, + fbm_fee = excluded.fbm_fee, + referral_percent = excluded.referral_percent, + can_sell = excluded.can_sell, + sellability_status = excluded.sellability_status, + sellability_reason = excluded.sellability_reason, + verdict = excluded.verdict, + confidence = excluded.confidence, + reasoning = excluded.reasoning, + fetched_at = excluded.fetched_at; + `); + + db.transaction((resultsBatch: AnalysisResult[]) => { + for (const r of resultsBatch) { + const price = + r.product.keepa?.currentPrice ?? + r.product.record.sellingPriceFromSheet ?? + r.product.spApi.estimatedSalePrice; + const rank = r.product.keepa?.salesRank ?? r.product.record.amazonRank; + + insertStmt.run( + r.product.record.asin, + runId, + r.product.record.name, + r.product.record.brand ?? null, + r.product.record.category ?? + r.product.keepa?.categoryTree?.join(" > ") ?? + null, + r.product.record.unitCost ?? null, + price ?? null, + r.product.keepa?.avgPrice90 ?? null, + r.product.record.avgPrice90FromSheet ?? null, + r.product.record.sellingPriceFromSheet ?? null, + rank ?? null, + r.product.keepa?.salesRankAvg90 ?? null, + r.product.keepa?.sellerCount ?? null, + r.product.keepa?.amazonIsSeller == null + ? null + : r.product.keepa.amazonIsSeller + ? 1 + : 0, + r.product.keepa?.amazonBuyboxSharePct90d ?? null, + r.product.keepa?.monthlySold ?? null, + r.product.keepa?.salesRankDrops30 ?? null, + r.product.keepa?.salesRankDrops90 ?? null, + r.product.spApi.fbaFee ?? null, + r.product.spApi.fbmFee ?? null, + r.product.spApi.referralFeePercent ?? null, + r.product.spApi.canSell == null + ? "unknown" + : r.product.spApi.canSell + ? "yes" + : "no", + r.product.spApi.sellabilityStatus ?? null, + r.product.spApi.sellabilityReason ?? null, + r.verdict.verdict, + r.verdict.confidence, + r.verdict.reasoning ?? null, + r.product.fetchedAt, + ); + } + })(results); // Execute the transaction with the results batch +} + +function loadCategoryBlacklist(filePath: string): Set { + const blacklist = new Set(); + + if (!existsSync(filePath)) { + log( + "warn", + `Blacklist file not found at ${filePath}; continuing with no excluded categories.`, + ); + return blacklist; + } + + const raw = readFileSync(filePath, "utf8"); + const lines = raw.split(/\r?\n/); + + for (let i = 0; i < lines.length; i++) { + const lineNumber = i + 1; + const line = lines[i] ?? ""; + const trimmed = line.trim(); + if (!trimmed || trimmed.startsWith("#")) continue; + + const [idPart, namePart] = trimmed.split(",", 2); + const idToken = idPart?.trim() ?? ""; + const nameToken = namePart?.trim() ?? ""; + + // Allow header row: id,name + if (idToken.toLowerCase() === "id") { + continue; + } + + if (!idToken) { + log( + "warn", + `Blacklist CSV line ${lineNumber}: missing id, row ignored (${trimmed}).`, + ); + continue; + } + + const id = Number(idToken); + if (!Number.isInteger(id) || id <= 0) { + log( + "warn", + `Blacklist CSV line ${lineNumber}: invalid id '${idToken}', row ignored (${trimmed}).`, + ); + continue; + } + + if (!nameToken) { + log( + "warn", + `Blacklist CSV line ${lineNumber}: missing name for id ${id}; accepted but please add name.`, + ); + } + + if (blacklist.has(id)) { + log( + "warn", + `Blacklist CSV line ${lineNumber}: duplicate id ${id}, keeping first occurrence.`, + ); + continue; + } + + blacklist.add(id); + } + + return blacklist; +} + +function assertSpApiPrerequisites(): void { + const missing: string[] = []; + if (!config.spApiClientId) missing.push("SP_API_CLIENT_ID"); + if (!config.spApiClientSecret) missing.push("SP_API_CLIENT_SECRET"); + if (!config.spApiRefreshToken) missing.push("SP_API_REFRESH_TOKEN"); + if (!config.spApiSellerId) missing.push("SP_API_SELLER_ID"); + + if (missing.length > 0) { + throw new Error(`Missing required SP-API env vars: ${missing.join(", ")}`); + } +} + +function sleep(ms: number): Promise { + return new Promise((resolve) => setTimeout(resolve, ms)); +} + +function sanitizeFileSegment(value: string): string { + const compact = value.trim().toLowerCase().replace(/\s+/g, "-"); + const safe = compact.replace(/[^a-z0-9-_]+/g, "-").replace(/-+/g, "-"); + return safe.replace(/^-|-$/g, "") || "category"; +} + +function parseKeepaRateLimitPayload(text: string): { + refillInMs?: number; + tokensLeft?: number; + refillRate?: number; +} { + try { + const parsed = JSON.parse(text); + return { + refillInMs: + typeof parsed?.refillIn === "number" && Number.isFinite(parsed.refillIn) + ? Math.max(0, parsed.refillIn) + : undefined, + tokensLeft: + typeof parsed?.tokensLeft === "number" && + Number.isFinite(parsed.tokensLeft) + ? parsed.tokensLeft + : undefined, + refillRate: + typeof parsed?.refillRate === "number" && + Number.isFinite(parsed.refillRate) + ? parsed.refillRate + : undefined, + }; + } catch { + return {}; + } +} + +function computeBackoffMs(attempt: number, refillInMs?: number): number { + const refillBased = refillInMs != null ? refillInMs + 1500 : 0; + const exponential = Math.min(60_000, 2 ** attempt * 1000); + const base = Math.max(refillBased, exponential); + return base + Math.floor(Math.random() * 750); +} + +async function waitForKeepaToken(): Promise { + if (keepaTokensLeft > 0) return; + + const elapsedMinutes = (Date.now() - keepaLastRequestMs) / 60_000; + const regenerated = Math.floor(elapsedMinutes * keepaRefillRate); + if (regenerated > 0) { + keepaTokensLeft += regenerated; + return; + } + + const waitMs = + Math.ceil((1 / keepaRefillRate) * 60_000) - + (Date.now() - keepaLastRequestMs); + + if (waitMs > 0) { + log( + "info", + `Keepa tokens depleted; waiting ${Math.ceil(waitMs / 1000)}s...`, + ); + await sleep(waitMs); + } + + keepaTokensLeft = 1; +} + +async function keepaGetJson(pathAndQuery: string): Promise { + let rateLimitHits = 0; + + while (true) { + await waitForKeepaToken(); + + const response = await fetch(`${KEEPA_BASE}${pathAndQuery}`); + keepaLastRequestMs = Date.now(); + + if (response.ok) { + const data = (await response.json()) as any; + if (typeof data?.tokensLeft === "number") { + keepaTokensLeft = data.tokensLeft; + } + if (typeof data?.refillRate === "number" && data.refillRate > 0) { + keepaRefillRate = data.refillRate; + } + return data; + } + + const text = await response.text(); + + if (response.status === 429) { + const rate = parseKeepaRateLimitPayload(text); + if (typeof rate.tokensLeft === "number") { + keepaTokensLeft = rate.tokensLeft; + } + if (typeof rate.refillRate === "number" && rate.refillRate > 0) { + keepaRefillRate = rate.refillRate; + } + + rateLimitHits++; + const waitMs = computeBackoffMs(rateLimitHits, rate.refillInMs); + log( + "warn", + `Keepa rate limited (429). Retry ${rateLimitHits} in ${Math.ceil(waitMs / 1000)}s...`, + ); + await sleep(waitMs); + continue; + } + + throw new Error(`Keepa HTTP ${response.status}: ${text}`); + } +} + +function normalizeCategoryList(data: any): CategoryInfo[] { + const deduped = new Map(); + + const addRawCategory = (value: any): void => { + const id = Number( + value?.catId ?? value?.categoryId ?? value?.id ?? value?.nodeId, + ); + if (!Number.isInteger(id) || id <= 0) return; + + const label = String( + value?.name ?? value?.label ?? `Category ${id}`, + ).trim(); + const parentId = Number(value?.parent ?? value?.parentId ?? -1); + const childCount = Array.isArray(value?.children) + ? value.children.length + : Number.isInteger(value?.childCount) + ? Number(value.childCount) + : 0; + + if (id === 0 || label.toLowerCase() === "root" || parentId === -1) { + return; + } + + if (!deduped.has(id)) { + deduped.set(id, { + id, + label: label || `Category ${id}`, + parentId, + childCount: Math.max(0, childCount), + }); + } + }; + + if (Array.isArray(data?.categories)) { + for (const value of data.categories) { + addRawCategory(value); + } + } + + if (data?.categories && typeof data.categories === "object") { + for (const value of Object.values(data.categories)) { + addRawCategory(value); + } + } + + if (Array.isArray(data?.categoryList)) { + for (const value of data.categoryList) { + addRawCategory(value); + } + } + + return [...deduped.values()]; +} + +function prioritizeLikelyBestsellerCategories( + categories: CategoryInfo[], +): CategoryInfo[] { + const leaves: CategoryInfo[] = []; + const nonLeaves: CategoryInfo[] = []; + + for (const category of categories) { + if (category.childCount === 0) { + leaves.push(category); + } else { + nonLeaves.push(category); + } + } + + const withNamedLabels = (list: CategoryInfo[]) => + list.filter((c) => !/^Category\s+\d+$/i.test(c.label)); + + const withFallbackLabels = (list: CategoryInfo[]) => + list.filter((c) => /^Category\s+\d+$/i.test(c.label)); + + return [ + ...withNamedLabels(leaves), + ...withFallbackLabels(leaves), + ...withNamedLabels(nonLeaves), + ...withFallbackLabels(nonLeaves), + ]; +} + +function resolveRootCategory( + category: CategoryInfo, + byId: Map, +): CategoryInfo { + let current = category; + const seen = new Set(); + + while (current.parentId > 0 && !seen.has(current.id)) { + seen.add(current.id); + const parent = byId.get(current.parentId); + if (!parent) break; + current = parent; + } + + return current; +} + +function selectCategoriesAcrossRoots( + categories: CategoryInfo[], + maxCategories: number, +): CategoryInfo[] { + const byId = new Map(categories.map((c) => [c.id, c])); + const grouped = new Map(); + + for (const category of categories) { + const root = resolveRootCategory(category, byId); + const bucket = grouped.get(root.id) ?? []; + bucket.push(category); + grouped.set(root.id, bucket); + } + + const rootIds = [...grouped.keys()]; + const selected: CategoryInfo[] = []; + let depth = 0; + + while (selected.length < maxCategories) { + let progressed = false; + + for (const rootId of rootIds) { + const bucket = grouped.get(rootId) ?? []; + if (depth >= bucket.length) continue; + selected.push(bucket[depth]!); + progressed = true; + if (selected.length >= maxCategories) break; + } + + if (!progressed) break; + depth++; + } + + return selected; +} + +async function discoverCategories( + maxCategories: number, +): Promise { + const data = await keepaGetJson( + `/category?key=${encodeURIComponent(config.keepaApiKey)}&domain=${DOMAIN_US}&category=0`, + ); + + const categories = normalizeCategoryList(data); + if (categories.length === 0) { + throw new Error("Keepa category discovery returned no usable categories."); + } + + const prioritized = prioritizeLikelyBestsellerCategories(categories); + return selectCategoriesAcrossRoots(prioritized, maxCategories); +} + +async function fetchCategoryBestSellerAsins( + category: CategoryInfo, + limit: number, +): Promise { + const query = new URLSearchParams({ + key: config.keepaApiKey, + domain: String(DOMAIN_US), + category: String(category.id), + range: "0", + variations: "0", + sublist: category.parentId > 0 ? "1" : "0", + }); + + const data = await keepaGetJson(`/bestsellers?${query.toString()}`); + + const bestSellersList = data?.bestSellersList; + const candidates = [ + bestSellersList, + bestSellersList?.asinList, + bestSellersList?.asins, + bestSellersList?.bestSellers, + bestSellersList?.bestSellerAsins, + data?.asinList, + data?.asins, + data?.bestsellers, + data?.bestSellers, + data?.bestSellerAsins, + data?.bestsellerList?.asinList, + data?.categories?.[String(category.id)]?.asinList, + ]; + + for (const value of candidates) { + if (Array.isArray(value)) { + return [ + ...new Set(value.map((v) => String(v).trim()).filter(Boolean)), + ].slice(0, limit); + } + } + + return []; +} + +async function fetchSellabilityMap( + asins: string[], +): Promise> { + const sellability = new Map(); + + for (let i = 0; i < asins.length; i += SELLABILITY_BATCH_SIZE) { + const chunk = asins.slice(i, i + SELLABILITY_BATCH_SIZE); + const chunkResults = await fetchSellabilityBatch(chunk); + + for (const asin of chunk) { + const info = chunkResults.get(asin) ?? { + canSell: null, + sellabilityStatus: "unknown" as const, + sellabilityReason: "Sellability check returned no result", + }; + sellability.set(asin, info); + } + + log( + "info", + ` Sellability progress: ${Math.min(i + chunk.length, asins.length)}/${asins.length}`, + ); + } + + return sellability; +} + +async function fetchSpApiMap( + asins: string[], + sellabilityMap: Map, +): Promise> { + const pricingQueue = [...asins]; + const spApiMap = new Map(); + let done = 0; + + async function worker(): Promise { + while (pricingQueue.length > 0) { + const asin = pricingQueue.shift(); + if (!asin) return; + + const sellability = sellabilityMap.get(asin) ?? { + canSell: null, + sellabilityStatus: "unknown", + sellabilityReason: "Sellability missing", + }; + + const spApi = await fetchSpApiPricingAndFees(asin, sellability); + spApiMap.set(asin, spApi); + + done++; + if (done % 10 === 0 || done === asins.length) { + log("info", ` Pricing progress: ${done}/${asins.length}`); + } + } + } + + const workers = Array.from( + { length: Math.min(PRICING_CONCURRENCY, asins.length || 1) }, + () => worker(), + ); + + await Promise.all(workers); + return spApiMap; +} + +function pickKeepaNumber(...values: unknown[]): number | null { + for (const value of values) { + if (typeof value !== "number" || !Number.isFinite(value)) continue; + if (value < 0) continue; + return value; + } + return null; +} + +function extractCurrentPrice(csv: number[][] | undefined): number | null { + if (!Array.isArray(csv)) return null; + + for (const series of [csv[0], csv[1]]) { + if (Array.isArray(series) && series.length >= 2) { + const lastPrice = series[series.length - 1]; + if (typeof lastPrice === "number" && lastPrice > 0) { + return Math.round((lastPrice / 100) * 100) / 100; + } + } + } + + return null; +} + +function parseKeepaProduct(product: Record): KeepaData { + const stats = product.stats; + const csv = product.csv; + const salesRankDrops30 = pickKeepaNumber( + product.salesRankDrops30, + stats?.salesRankDrops30, + ); + const salesRankDrops90 = + pickKeepaNumber(product.salesRankDrops90, stats?.salesRankDrops90) ?? + (salesRankDrops30 != null ? salesRankDrops30 * 3 : null); + const monthlySold = + pickKeepaNumber(product.monthlySold, stats?.monthlySold) ?? + salesRankDrops30; + const amazonIsSeller = resolveAmazonIsSeller(product, stats, csv); + const amazonBuyboxSharePct90d = + extractAmazonBuyboxSharePct90d(product, stats) ?? + computeAmazonBuyBoxSharePctFromHistory( + product.buyBoxSellerIdHistory, + 90, + new Set([AMAZON_US_SELLER_ID]), + ); + + return { + currentPrice: extractCurrentPrice(csv), + avgPrice90: stats?.avg?.[0] != null ? stats.avg[0] / 100 : null, + minPrice90: stats?.min?.[0] != null ? stats.min[0] / 100 : null, + maxPrice90: stats?.max?.[0] != null ? stats.max[0] / 100 : null, + salesRank: stats?.current?.[3] ?? null, + salesRankAvg90: stats?.avg?.[3] ?? null, + salesRankDrops30, + salesRankDrops90, + sellerCount: stats?.current?.[11] ?? null, + amazonIsSeller, + amazonBuyboxSharePct90d, + buyBoxSeller: product.buyBoxSellerId ?? null, + buyBoxPrice: stats?.current?.[18] != null ? stats.current[18] / 100 : null, + monthlySold, + categoryTree: + product.categoryTree?.map((c: { name: string }) => c.name) ?? [], + }; +} + +function resolveAmazonIsSeller( + product: Record, + stats: Record | undefined, + csv: number[][] | undefined, +): boolean | null { + if (typeof product.isAmazonSeller === "boolean") + return product.isAmazonSeller; + + if (typeof product.availabilityAmazon === "number") { + if (product.availabilityAmazon >= 0) return true; + if ( + product.availabilityAmazon === -1 || + product.availabilityAmazon === -2 + ) { + return false; + } + } + + if (stats?.buyBoxIsAmazon === true) return true; + + if (typeof stats?.current?.[0] === "number") { + if (stats.current[0] > 0) return true; + if (stats.current[0] === -1 || stats.current[0] === -2) return false; + } + + const latestAmazonPrice = extractLatestPositivePrice(csv?.[0]); + if (latestAmazonPrice != null) return true; + + return null; +} + +function extractAmazonBuyboxSharePct90d( + product: Record, + stats: Record | undefined, +): number | null { + const candidates: unknown[] = [ + product.buyBoxStatsAmazon90, + stats?.buyBoxStatsAmazon90, + product.buyBoxStats?.amazon90, + product.buyBoxStats?.amazon?.[90], + product.buyBoxStats?.amazon?.["90"], + product.buyBoxStats?.[AMAZON_US_SELLER_ID]?.[90], + product.buyBoxStats?.[AMAZON_US_SELLER_ID]?.["90"], + ]; + + for (const value of candidates) { + if (typeof value !== "number" || !Number.isFinite(value)) continue; + if (value < 0 || value > 100) continue; + return Math.round(value * 100) / 100; + } + + return null; +} + +function computeAmazonBuyBoxSharePctFromHistory( + history: unknown, + windowDays: number, + amazonSellerIds: Set, +): number | null { + if (!Array.isArray(history) || history.length < 2) return null; + + const nowKeepaMinutes = + Math.floor(Date.now() / 60_000) - KEEPA_MINUTES_OFFSET; + const windowStart = nowKeepaMinutes - windowDays * 24 * 60; + let qualifiedMinutes = 0; + let amazonMinutes = 0; + + for (let i = 0; i < history.length - 1; i += 2) { + const startMinute = Number.parseInt(String(history[i]), 10); + const sellerId = String(history[i + 1] ?? "").toUpperCase(); + const nextRaw = i + 2 < history.length ? history[i + 2] : nowKeepaMinutes; + const endMinute = Number.parseInt(String(nextRaw), 10); + + if (!Number.isFinite(startMinute) || !Number.isFinite(endMinute)) continue; + if (endMinute <= startMinute) continue; + + const intervalStart = Math.max(startMinute, windowStart); + const intervalEnd = Math.min(endMinute, nowKeepaMinutes); + if (intervalEnd <= intervalStart) continue; + + if (sellerId === "-1" || sellerId === "-2") continue; + + const minutes = intervalEnd - intervalStart; + qualifiedMinutes += minutes; + if (amazonSellerIds.has(sellerId)) { + amazonMinutes += minutes; + } + } + + if (qualifiedMinutes === 0) return null; + return Math.round((amazonMinutes / qualifiedMinutes) * 10_000) / 100; +} + +function extractLatestPositivePrice(series: unknown): number | null { + if (!Array.isArray(series) || series.length < 2) return null; + const last = series[series.length - 1]; + if (typeof last !== "number" || !Number.isFinite(last) || last <= 0) { + return null; + } + return last / 100; +} + +async function fetchKeepaEnrichmentMap( + asins: string[], + onChunkComplete?: ( + chunkMap: Map, + ) => void, +): Promise> { + const out = new Map(); + + for (let i = 0; i < asins.length; i += KEEPA_PRODUCT_CHUNK_SIZE) { + const chunk = asins.slice(i, i + KEEPA_PRODUCT_CHUNK_SIZE); + const chunkMap = new Map(); + const asinParam = encodeURIComponent(chunk.join(",")); + const data = await keepaGetJson( + `/product?key=${encodeURIComponent(config.keepaApiKey)}&domain=${DOMAIN_US}&asin=${asinParam}&stats=90&buybox=1&days=90`, + ); + + const products = Array.isArray(data?.products) ? data.products : []; + for (const product of products) { + const asin = String(product?.asin ?? "").trim(); + if (!asin) continue; + const parsed = { + keepa: parseKeepaProduct(product), + title: String(product?.title ?? "").trim(), + }; + out.set(asin, parsed); + chunkMap.set(asin, parsed); + } + + log( + "info", + ` Keepa enrichment progress: ${Math.min(i + chunk.length, asins.length)}/${asins.length}`, + ); + + if (onChunkComplete && chunkMap.size > 0) { + try { + onChunkComplete(chunkMap); + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + log("warn", ` Keepa chunk callback failed: ${message}`); + } + } + } + + return out; +} + +function selectMidRangeAsins( + asins: string[], + keepaEnrichmentMap: Map, + perCategoryTop: number, + minMonthlySold: number, + maxMonthlySold: number, + minPrice: number, + maxPrice: number, + minSellerCount: number, + maxSellerCount: number, + minAmazonBuyboxSharePct: number, + maxAmazonBuyboxSharePct: number, +): string[] { + return [...asins] + .map((asin) => ({ + asin, + keepa: keepaEnrichmentMap.get(asin)?.keepa, + })) + .filter( + (item): item is { asin: string; keepa: KeepaData } => item.keepa != null, + ) + .filter(({ keepa }) => { + if ( + typeof keepa.monthlySold !== "number" || + keepa.monthlySold < minMonthlySold || + keepa.monthlySold > maxMonthlySold + ) { + return false; + } + + const effectivePrice = keepa.currentPrice ?? keepa.avgPrice90; + if ( + typeof effectivePrice !== "number" || + !Number.isFinite(effectivePrice) || + effectivePrice < minPrice || + effectivePrice > maxPrice + ) { + return false; + } + + if ( + typeof keepa.sellerCount !== "number" || + keepa.sellerCount < minSellerCount || + keepa.sellerCount > maxSellerCount + ) { + return false; + } + + if (keepa.amazonIsSeller === true) { + if ( + typeof keepa.amazonBuyboxSharePct90d !== "number" || + !Number.isFinite(keepa.amazonBuyboxSharePct90d) || + keepa.amazonBuyboxSharePct90d < minAmazonBuyboxSharePct || + keepa.amazonBuyboxSharePct90d > maxAmazonBuyboxSharePct + ) { + return false; + } + } + + return true; + }) + .sort((a, b) => { + const monthlySoldDelta = + (b.keepa.monthlySold ?? 0) - (a.keepa.monthlySold ?? 0); + if (monthlySoldDelta !== 0) return monthlySoldDelta; + return ( + (a.keepa.sellerCount ?? Number.MAX_SAFE_INTEGER) - + (b.keepa.sellerCount ?? Number.MAX_SAFE_INTEGER) + ); + }) + .slice(0, perCategoryTop) + .map((item) => item.asin); +} + +function buildEnrichedProducts( + asins: string[], + sellabilityMap: Map, + spApiMap: Map, + keepaEnrichmentMap: Map, +): EnrichedProduct[] { + return asins.map((asin) => { + const sellability = sellabilityMap.get(asin) ?? { + canSell: null, + sellabilityStatus: "unknown" as const, + sellabilityReason: "Sellability missing", + }; + + const spApi = spApiMap.get(asin) ?? { + fbaFee: 0, + fbmFee: 0, + referralFeePercent: 15, + estimatedSalePrice: 0, + canSell: sellability.canSell, + sellabilityStatus: sellability.sellabilityStatus, + sellabilityReason: sellability.sellabilityReason, + }; + + const enrichedKeepa = keepaEnrichmentMap.get(asin); + const keepa = enrichedKeepa?.keepa ?? null; + const title = enrichedKeepa?.title ?? asin; + + const record: ProductRecord = { + asin, + name: title, + unitCost: 0, + category: undefined, + brand: undefined, + supplier: undefined, + }; + + if (keepa?.currentPrice && spApi.estimatedSalePrice === 0) { + spApi.estimatedSalePrice = keepa.currentPrice; + } + + return { + record, + keepa, + spApi, + fetchedAt: new Date().toISOString(), + }; + }); +} + +export async function processCategory( + db: Database, + runId: number, + category: CategoryInfo, + perCategoryTop: number, + categoryCandidatePool: number, + minMonthlySold: number, + maxMonthlySold: number, + minPrice: number, + maxPrice: number, + minSellerCount: number, + maxSellerCount: number, + minAmazonBuyboxSharePct: number, + maxAmazonBuyboxSharePct: number, +): Promise { + log("info", `\nCategory ${category.label} (${category.id})`); + + const topAsins = await fetchCategoryBestSellerAsins( + category, + categoryCandidatePool, + ); + if (topAsins.length === 0) { + log("info", " Keepa returned no ASINs for this category."); + await updateCategoryRunSummary(db, runId, { + topAsinsChecked: 0, + availableAsins: 0, + fba: 0, + fbm: 0, + skip: 0, + status: "empty", + error: "No ASINs returned by Keepa", + }); + return { + categoryId: category.id, + categoryLabel: category.label, + topAsinsChecked: 0, + availableAsins: 0, + fba: 0, + fbm: 0, + skip: 0, + status: "empty", + error: "No ASINs returned by Keepa", + results: [], + }; + } + + const uniqueTopAsins = Array.from(new Set(topAsins)); + if (uniqueTopAsins.length !== topAsins.length) { + log( + "warn", + ` Removed ${topAsins.length - uniqueTopAsins.length} duplicate ASINs before analysis.`, + ); + } + + log("info", ` Candidate ASINs fetched: ${uniqueTopAsins.length}`); + + const sellabilityMap = new Map(); + const keepaEnrichment = new Map< + string, + { keepa: KeepaData; title: string } + >(); + const cachedSpApiMap = new Map(); + const preAnalyzedByAsin = new Map(); + const scheduledAsins = new Set(); + const preanalysisTasks: Promise[] = []; + const uncachedAsins: string[] = []; + + for (const asin of uniqueTopAsins) { + const cached = await getApiCache(asin); + if (!cached) { + uncachedAsins.push(asin); + continue; + } + + if (!cached.keepa) { + uncachedAsins.push(asin); + continue; + } + + keepaEnrichment.set(asin, { + keepa: cached.keepa, + title: cached.title, + }); + cachedSpApiMap.set(asin, cached.spApi); + sellabilityMap.set(asin, { + canSell: cached.spApi.canSell, + sellabilityStatus: cached.spApi.sellabilityStatus, + sellabilityReason: cached.spApi.sellabilityReason, + }); + } + + async function schedulePreanalysisForAsins(asins: string[]): Promise { + const toSchedule = asins.filter( + (asin) => !scheduledAsins.has(asin) && keepaEnrichment.has(asin), + ); + + if (toSchedule.length === 0) { + return; + } + + for (const asin of toSchedule) { + scheduledAsins.add(asin); + } + + const task = (async () => { + const spApiMap = new Map(); + const uncachedSpApiAsins: string[] = []; + + for (const asin of toSchedule) { + const cached = cachedSpApiMap.get(asin); + if (cached) { + spApiMap.set(asin, cached); + continue; + } + uncachedSpApiAsins.push(asin); + } + + if (uncachedSpApiAsins.length > 0) { + const fetchedSpApiMap = await fetchSpApiMap( + uncachedSpApiAsins, + sellabilityMap, + ); + for (const [asin, spApi] of fetchedSpApiMap.entries()) { + spApiMap.set(asin, spApi); + cachedSpApiMap.set(asin, spApi); + } + } + + const enrichedProducts = buildEnrichedProducts( + toSchedule, + sellabilityMap, + spApiMap, + keepaEnrichment, + ); + + for (const product of enrichedProducts) { + await setApiCache( + product.record.asin, + { + title: product.record.name, + keepa: product.keepa, + spApi: product.spApi, + fetchedAt: product.fetchedAt, + }, + MID_RANGE_API_CACHE_TTL_SECONDS, + ); + } + + const preTotalBatches = Math.ceil( + enrichedProducts.length / LLM_BATCH_SIZE, + ); + for (let i = 0; i < enrichedProducts.length; i += LLM_BATCH_SIZE) { + const batch = enrichedProducts.slice(i, i + LLM_BATCH_SIZE); + const batchNum = Math.floor(i / LLM_BATCH_SIZE) + 1; + log( + "info", + ` Pre-analysis batch ${batchNum}/${preTotalBatches} (${batch.length} ASINs)...`, + ); + + let batchVerdicts: LlmVerdict[]; + try { + batchVerdicts = await analyzeProducts(batch); + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + log("warn", ` Pre-analysis batch failed: ${message}`); + continue; + } + + const verdictByAsin = new Map(batchVerdicts.map((v) => [v.asin, v])); + for (const product of batch) { + const verdict = verdictByAsin.get(product.record.asin); + if (!verdict) { + continue; + } + + preAnalyzedByAsin.set(product.record.asin, { + product, + verdict, + }); + } + } + })().catch((err) => { + const message = err instanceof Error ? err.message : String(err); + log("warn", ` Background pre-analysis failed: ${message}`); + }); + + preanalysisTasks.push(task); + } + + await schedulePreanalysisForAsins( + selectMidRangeAsins( + uniqueTopAsins, + keepaEnrichment, + perCategoryTop, + minMonthlySold, + maxMonthlySold, + minPrice, + maxPrice, + minSellerCount, + maxSellerCount, + minAmazonBuyboxSharePct, + maxAmazonBuyboxSharePct, + ), + ); + + if (uncachedAsins.length > 0) { + const fetchedSellability = await fetchSellabilityMap(uncachedAsins); + for (const [asin, info] of fetchedSellability.entries()) { + sellabilityMap.set(asin, info); + } + + const fetchedKeepa = await fetchKeepaEnrichmentMap( + uncachedAsins, + (chunkMap) => { + for (const [asin, value] of chunkMap.entries()) { + keepaEnrichment.set(asin, value); + } + + const provisionalSelectedAsins = selectMidRangeAsins( + uniqueTopAsins, + keepaEnrichment, + perCategoryTop, + minMonthlySold, + maxMonthlySold, + minPrice, + maxPrice, + minSellerCount, + maxSellerCount, + minAmazonBuyboxSharePct, + maxAmazonBuyboxSharePct, + ); + + void schedulePreanalysisForAsins(provisionalSelectedAsins); + }, + ); + for (const [asin, value] of fetchedKeepa.entries()) { + keepaEnrichment.set(asin, value); + } + } + + log( + "info", + ` API cache hits: ${uniqueTopAsins.length - uncachedAsins.length}/${uniqueTopAsins.length}`, + ); + + const sellableCount = uniqueTopAsins.filter((asin) => { + const info = sellabilityMap.get(asin); + return info?.canSell === true && info.sellabilityStatus === "available"; + }).length; + log( + "info", + ` Sellability snapshot: sellable=${sellableCount} non-sellable-or-unknown=${uniqueTopAsins.length - sellableCount}`, + ); + + const selectedAsins = selectMidRangeAsins( + uniqueTopAsins, + keepaEnrichment, + perCategoryTop, + minMonthlySold, + maxMonthlySold, + minPrice, + maxPrice, + minSellerCount, + maxSellerCount, + minAmazonBuyboxSharePct, + maxAmazonBuyboxSharePct, + ); + + log( + "info", + ` Selected mid-range ASINs: ${selectedAsins.length}/${uniqueTopAsins.length} (monthlySold=${minMonthlySold}-${maxMonthlySold}, price=${minPrice}-${maxPrice}, sellerCount=${minSellerCount}-${maxSellerCount}, amazonBuyboxShare=${minAmazonBuyboxSharePct}-${maxAmazonBuyboxSharePct} when Amazon sells)`, + ); + + if (selectedAsins.length === 0) { + await updateCategoryRunSummary(db, runId, { + topAsinsChecked: uniqueTopAsins.length, + availableAsins: 0, + fba: 0, + fbm: 0, + skip: 0, + status: "empty", + error: "No ASINs matched the configured mid-range criteria", + }); + return { + categoryId: category.id, + categoryLabel: category.label, + topAsinsChecked: uniqueTopAsins.length, + availableAsins: 0, + fba: 0, + fbm: 0, + skip: 0, + status: "empty", + error: "No ASINs matched the configured mid-range criteria", + results: [], + }; + } + + const spApiMap = new Map(); + await Promise.allSettled(preanalysisTasks); + + const resultByAsin = new Map(); + for (const asin of selectedAsins) { + const pre = preAnalyzedByAsin.get(asin); + if (pre) { + resultByAsin.set(asin, pre); + } + } + + const missingFinalAsins = selectedAsins.filter( + (asin) => !resultByAsin.has(asin), + ); + if (missingFinalAsins.length > 0) { + log( + "info", + ` Catch-up analysis required for ${missingFinalAsins.length}/${selectedAsins.length} selected ASINs...`, + ); + } + + const selectedUncachedSpApiAsins: string[] = []; + for (const asin of missingFinalAsins) { + const cached = cachedSpApiMap.get(asin); + if (cached) { + spApiMap.set(asin, cached); + } else { + selectedUncachedSpApiAsins.push(asin); + } + } + + if (selectedUncachedSpApiAsins.length > 0) { + const fetchedSpApiMap = await fetchSpApiMap( + selectedUncachedSpApiAsins, + sellabilityMap, + ); + for (const [asin, spApi] of fetchedSpApiMap.entries()) { + spApiMap.set(asin, spApi); + cachedSpApiMap.set(asin, spApi); + } + } + + const catchUpProducts = buildEnrichedProducts( + missingFinalAsins, + sellabilityMap, + spApiMap, + keepaEnrichment, + ); + + for (const product of catchUpProducts) { + await setApiCache( + product.record.asin, + { + title: product.record.name, + keepa: product.keepa, + spApi: product.spApi, + fetchedAt: product.fetchedAt, + }, + MID_RANGE_API_CACHE_TTL_SECONDS, + ); + } + + if (catchUpProducts.length > 0) { + const catchUpBatches = Math.ceil(catchUpProducts.length / LLM_BATCH_SIZE); + for (let i = 0; i < catchUpProducts.length; i += LLM_BATCH_SIZE) { + const batch = catchUpProducts.slice(i, i + LLM_BATCH_SIZE); + const batchNum = Math.floor(i / LLM_BATCH_SIZE) + 1; + log("info", ` Catch-up LLM batch ${batchNum}/${catchUpBatches}...`); + + let batchVerdicts: LlmVerdict[]; + try { + batchVerdicts = await analyzeProducts(batch); + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + log("warn", ` Catch-up LLM batch failed: ${message}`); + batchVerdicts = batch.map((p) => ({ + asin: p.record.asin, + verdict: "SKIP", + confidence: 0, + reasoning: "LLM analysis failed", + })); + } + + const verdictByAsin = new Map(batchVerdicts.map((v) => [v.asin, v])); + for (const product of batch) { + resultByAsin.set(product.record.asin, { + product, + verdict: verdictByAsin.get(product.record.asin) ?? { + asin: product.record.asin, + verdict: "SKIP", + confidence: 0, + reasoning: "LLM returned no verdict", + }, + }); + } + } + } + + const results: AnalysisResult[] = []; + for (const asin of selectedAsins) { + const existing = resultByAsin.get(asin); + if (existing) { + results.push(existing); + continue; + } + + const fallbackProducts = buildEnrichedProducts( + [asin], + sellabilityMap, + spApiMap, + keepaEnrichment, + ); + const fallbackProduct = fallbackProducts[0]; + if (!fallbackProduct) continue; + results.push({ + product: fallbackProduct, + verdict: { + asin, + verdict: "SKIP", + confidence: 0, + reasoning: "Missing pre-analysis and catch-up result", + }, + }); + } + + log( + "info", + ` Final selected ASINs resolved: ${results.length}/${selectedAsins.length} (pre-analyzed=${selectedAsins.length - missingFinalAsins.length}, catch-up=${missingFinalAsins.length})`, + ); + + let fba = 0; + let fbm = 0; + let skip = 0; + + const totalBatches = Math.ceil(results.length / LLM_BATCH_SIZE); + + for (let i = 0; i < results.length; i += LLM_BATCH_SIZE) { + const batchResults = results.slice(i, i + LLM_BATCH_SIZE); + const batchNum = Math.floor(i / LLM_BATCH_SIZE) + 1; + log("info", ` Persisting batch ${batchNum}/${totalBatches}...`); + + await insertProductAnalysisResults(db, runId, batchResults); + + for (const result of batchResults) { + if (result.verdict.verdict === "FBA") { + fba++; + } else if (result.verdict.verdict === "FBM") { + fbm++; + } else { + skip++; + } + } + + await updateCategoryRunSummary(db, runId, { + topAsinsChecked: uniqueTopAsins.length, + availableAsins: selectedAsins.length, + fba, + fbm, + skip, + status: "running", + error: "", + }); + + log( + "info", + ` Persisted batch ${batchNum}/${totalBatches} (${batchResults.length} rows, totals FBA/FBM/SKIP=${fba}/${fbm}/${skip})`, + ); + } + + await updateCategoryRunSummary(db, runId, { + topAsinsChecked: uniqueTopAsins.length, + availableAsins: selectedAsins.length, + fba, + fbm, + skip, + status: "ok", + error: "", + }); + + return { + categoryId: category.id, + categoryLabel: category.label, + topAsinsChecked: uniqueTopAsins.length, + availableAsins: selectedAsins.length, + fba, + fbm, + skip, + status: "ok", + error: "", + results, + }; +} + +export async function main(): Promise { + const args = parseArgs(); + assertSpApiPrerequisites(); + + await connectCache(); + try { + mkdirSync(args.outputDir, { recursive: true }); + const DB_PATH = + process.env.RESULTS_DB_PATH || path.join(process.cwd(), "results.db"); + initDb(DB_PATH); + const db = getDb(DB_PATH); + + log("info", "Starting per-category mid-range pipeline"); + log("info", `Marketplace: ${config.spApiMarketplaceId}`); + log("info", `SP-API region: ${config.spApiRegion}`); + log("info", `Category limit: ${args.categoryLimit}`); + log( + "info", + `Top ASINs per category after mid-range filter: ${args.perCategoryTop}`, + ); + log("info", `Category candidate pool: ${args.categoryCandidatePool}`); + log( + "info", + `Monthly sold range: ${args.minMonthlySold}-${args.maxMonthlySold}`, + ); + log("info", `Price range: ${args.minPrice}-${args.maxPrice}`); + log( + "info", + `Seller count range: ${args.minSellerCount}-${args.maxSellerCount}`, + ); + log( + "info", + `Amazon buybox share range (only when Amazon sells): ${args.minAmazonBuyboxSharePct}-${args.maxAmazonBuyboxSharePct}`, + ); + log( + "info", + `API cache TTL: ${Math.floor(MID_RANGE_API_CACHE_TTL_SECONDS / 3600)}h`, + ); + log("info", `Blacklist file: ${args.blacklistFile}`); + + const categoryBlacklist = loadCategoryBlacklist(args.blacklistFile); + log("info", `Loaded ${categoryBlacklist.size} blacklisted category IDs.`); + + const categories = await discoverCategories(args.categoryLimit); + const allowedCategories = categories.filter( + (c) => !categoryBlacklist.has(c.id), + ); + const blacklistedCount = categories.length - allowedCategories.length; + log( + "info", + `Discovered ${categories.length} categories (${blacklistedCount} blacklisted, ${allowedCategories.length} to process).`, + ); + + const runTimestamp = new Date().toISOString(); + let processedCategories = 0; + let totalInsertedAsins = 0; + const allCategorySummaries: CategoryRunSummary[] = []; + + for (const category of allowedCategories) { + let categorySummary: CategoryRunSummary; + let runId: number | undefined; + try { + runId = await insertCategoryRunSummary( + db, + { + categoryId: category.id, + categoryLabel: category.label, + topAsinsChecked: 0, + availableAsins: 0, + fba: 0, + fbm: 0, + skip: 0, + status: "running", + error: "", + results: [], + }, + runTimestamp, + ); + + categorySummary = await processCategory( + db, + runId, + category, + args.perCategoryTop, + args.categoryCandidatePool, + args.minMonthlySold, + args.maxMonthlySold, + args.minPrice, + args.maxPrice, + args.minSellerCount, + args.maxSellerCount, + args.minAmazonBuyboxSharePct, + args.maxAmazonBuyboxSharePct, + ); + + totalInsertedAsins += categorySummary.results?.length ?? 0; + + processedCategories++; + allCategorySummaries.push({ ...categorySummary, runId }); + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + log( + "warn", + `Skipping category ${category.label} (${category.id}) due to error: ${message}`, + ); + categorySummary = { + categoryId: category.id, + categoryLabel: category.label, + topAsinsChecked: 0, + availableAsins: 0, + fba: 0, + fbm: 0, + skip: 0, + status: "failed", + error: message, + results: [], + }; + if (runId) { + await updateCategoryRunSummary(db, runId, { + topAsinsChecked: 0, + availableAsins: 0, + fba: 0, + fbm: 0, + skip: 0, + status: "failed", + error: message, + }); + } + processedCategories++; + allCategorySummaries.push({ ...categorySummary, runId }); + } + } + + log("info", "\nRun summary"); + log("info", `Categories discovered/selected: ${categories.length}`); + log("info", `Categories processed: ${processedCategories}`); + log("info", `Total ASINs inserted into DB: ${totalInsertedAsins}`); + } finally { + await disconnectCache(); + } +} + +if (import.meta.main) { + main().catch((err) => { + log("error", `Mid-range process crashed: ${String(err)}`); + process.exit(1); + }); +}