diff --git a/package.json b/package.json index f994757..efd40dd 100644 --- a/package.json +++ b/package.json @@ -5,6 +5,7 @@ "private": true, "scripts": { "bestsellers": "bun run src/bestsellers-by-category.ts", + "monthly-sold": "bun run src/top-monthly-sold-by-category.ts", "start": "bun run src/index.ts", "start:web": "bun --hot src/server.ts", "build:web": "bun build src/web/index.html --outdir dist", diff --git a/src/top-monthly-sold-by-category.test.ts b/src/top-monthly-sold-by-category.test.ts new file mode 100644 index 0000000..98e3d51 --- /dev/null +++ b/src/top-monthly-sold-by-category.test.ts @@ -0,0 +1,316 @@ +import { test, expect, beforeAll, afterAll, beforeEach, mock } from "bun:test"; +import { Database } from "bun:sqlite"; +import { getDb, initDb, closeDb } from "./database.ts"; +import path from "node:path"; +import { rmSync, mkdirSync } from "node:fs"; + +const fetchSellabilityBatchMock = mock(async (asins: string[]) => { + return new Map( + asins.map((asin) => { + if (asin === "B000000003") { + return [ + asin, + { + canSell: false, + sellabilityStatus: "restricted" as const, + sellabilityReason: "restricted", + }, + ]; + } + + return [ + asin, + { + canSell: true, + sellabilityStatus: "available" as const, + sellabilityReason: "ok", + }, + ]; + }), + ); +}); + +const fetchSpApiPricingAndFeesMock = mock(async () => ({ + fbaFee: 4, + fbmFee: 2, + referralFeePercent: 15, + estimatedSalePrice: 25, + canSell: true, + sellabilityStatus: "available" as const, + sellabilityReason: "ok", +})); + +const analyzeProductsMock = mock(async (products: any[]) => { + return products.map((p) => ({ + asin: p.record.asin, + verdict: "FBA", + confidence: 90, + reasoning: "mocked", + })); +}); + +mock.module("./sp-api.ts", () => ({ + fetchSellabilityBatch: fetchSellabilityBatchMock, + fetchSpApiPricingAndFees: fetchSpApiPricingAndFeesMock, +})); + +mock.module("./llm.ts", () => ({ + analyzeProducts: analyzeProductsMock, +})); + +const modulePromise = import("./top-monthly-sold-by-category.ts"); + +const DB_TEST_PATH = path.join( + process.cwd(), + "test_output", + "test_monthly_sold_analysis.sqlite", +); + +let db: Database; +let processCategory: ( + db: Database, + runId: number, + category: any, + perCategoryTop: number, + categoryCandidatePool: number, + minMonthlySold: number, +) => Promise; +let insertCategoryRunSummary: ( + db: Database, + summary: any, + runTimestamp: string, +) => Promise; +let originalFetch: typeof globalThis.fetch; + +beforeAll(async () => { + const mod = await modulePromise; + processCategory = mod.processCategory; + insertCategoryRunSummary = mod.insertCategoryRunSummary; + + rmSync(path.dirname(DB_TEST_PATH), { recursive: true, force: true }); + mkdirSync(path.dirname(DB_TEST_PATH), { recursive: true }); + initDb(DB_TEST_PATH); + db = getDb(DB_TEST_PATH); + + originalFetch = globalThis.fetch; +}); + +afterAll(() => { + globalThis.fetch = originalFetch; + closeDb(); + rmSync(path.dirname(DB_TEST_PATH), { recursive: true, force: true }); +}); + +beforeEach(() => { + db.run("DELETE FROM product_analysis_results"); + db.run("DELETE FROM category_analysis_runs"); + + globalThis.fetch = mock(async (input: string | URL | Request) => { + const rawUrl = + typeof input === "string" + ? input + : input instanceof URL + ? input.toString() + : input.url; + const url = new URL(rawUrl); + + if (url.pathname === "/bestsellers") { + return new Response( + JSON.stringify({ + bestSellersList: [ + "B000000001", + "B000000002", + "B000000003", + "B000000004", + ], + tokensLeft: 10, + refillRate: 1, + }), + { status: 200 }, + ); + } + + if (url.pathname === "/product") { + return new Response( + JSON.stringify({ + products: [ + { + asin: "B000000001", + title: "Product One", + monthlySold: 600, + stats: { + current: [ + null, + null, + null, + 1000, + null, + null, + null, + null, + null, + null, + null, + 2, + null, + null, + null, + null, + null, + null, + 2599, + ], + avg: [2400, null, null, 1200], + }, + csv: [[1, 2599]], + categoryTree: [{ name: "Category 1" }], + }, + { + asin: "B000000002", + title: "Product Two", + monthlySold: 250, + stats: { + current: [ + null, + null, + null, + 2000, + null, + null, + null, + null, + null, + null, + null, + 3, + null, + null, + null, + null, + null, + null, + 1999, + ], + avg: [1800, null, null, 2200], + }, + csv: [[1, 1999]], + categoryTree: [{ name: "Category 1" }], + }, + { + asin: "B000000003", + title: "Product Three", + monthlySold: 800, + stats: { + current: [ + null, + null, + null, + 1500, + null, + null, + null, + null, + null, + null, + null, + 1, + null, + null, + null, + null, + null, + null, + 2099, + ], + avg: [2000, null, null, 1800], + }, + csv: [[1, 2099]], + categoryTree: [{ name: "Category 1" }], + }, + { + asin: "B000000004", + title: "Product Four", + monthlySold: 400, + stats: { + current: [ + null, + null, + null, + 3000, + null, + null, + null, + null, + null, + null, + null, + 4, + null, + null, + null, + null, + null, + null, + 2899, + ], + avg: [2600, null, null, 2800], + }, + csv: [[1, 2899]], + categoryTree: [{ name: "Category 1" }], + }, + ], + tokensLeft: 10, + refillRate: 1, + }), + { status: 200 }, + ); + } + + return new Response("not found", { status: 404 }); + }) as unknown as typeof globalThis.fetch; +}); + +test("processCategory filters to sellable ASINs with monthly sold >= threshold and keeps top-N", async () => { + const mockCategory = { + id: 1, + label: "Category 1", + parentId: 0, + childCount: 0, + }; + + const runId = await insertCategoryRunSummary( + db, + { + categoryId: mockCategory.id, + categoryLabel: mockCategory.label, + topAsinsChecked: 0, + availableAsins: 0, + fba: 0, + fbm: 0, + skip: 0, + status: "running", + error: "", + results: [], + }, + new Date().toISOString(), + ); + + const summary = await processCategory(db, runId, mockCategory, 2, 4, 300); + + expect(summary.status).toBe("ok"); + expect(summary.topAsinsChecked).toBe(4); + expect(summary.availableAsins).toBe(2); + expect(summary.results?.length).toBe(2); + + const productResults = db + .query( + "SELECT asin, monthly_sold FROM product_analysis_results ORDER BY monthly_sold DESC", + ) + .all() as Array<{ asin: string; monthly_sold: number }>; + + expect(productResults.length).toBe(2); + expect(productResults[0]?.asin).toBe("B000000001"); + expect(productResults[0]?.monthly_sold).toBe(600); + expect(productResults[1]?.asin).toBe("B000000004"); + expect(productResults[1]?.monthly_sold).toBe(400); +}); diff --git a/src/top-monthly-sold-by-category.ts b/src/top-monthly-sold-by-category.ts new file mode 100644 index 0000000..34e3bae --- /dev/null +++ b/src/top-monthly-sold-by-category.ts @@ -0,0 +1,1279 @@ +import { existsSync, mkdirSync, readFileSync } from "node:fs"; +import path from "node:path"; +import { type Database, getDb, initDb } from "./database.ts"; +import { config } from "./config.ts"; +import { analyzeProducts } from "./llm.ts"; +import { fetchSellabilityBatch, fetchSpApiPricingAndFees } from "./sp-api.ts"; +import type { + AnalysisResult, + EnrichedProduct, + KeepaData, + LlmVerdict, + ProductRecord, + SellabilityInfo, + SpApiData, +} from "./types.ts"; + +type CategoryInfo = { + id: number; + label: string; + parentId: number; + childCount: number; +}; + +type ParsedArgs = { + outputDir: string; + categoryLimit: number; + perCategoryTop: number; + categoryCandidatePool: number; + minMonthlySold: number; + blacklistFile: string; +}; + +type CategoryRunSummary = { + categoryId: number; + categoryLabel: string; + topAsinsChecked: number; + availableAsins: number; + fba: number; + fbm: number; + skip: number; + status: "running" | "ok" | "empty" | "failed"; + error: string; + runId?: number; + results?: AnalysisResult[]; +}; + +const KEEPA_BASE = "https://api.keepa.com"; +const DOMAIN_US = 1; +const DEFAULT_CATEGORY_LIMIT = 32; +const DEFAULT_PER_CATEGORY_TOP = 100; +const DEFAULT_CATEGORY_CANDIDATE_POOL = 500; +const DEFAULT_MIN_MONTHLY_SOLD = 300; +const SELLABILITY_BATCH_SIZE = 60; +const LLM_BATCH_SIZE = 10; +const PRICING_CONCURRENCY = 5; +const KEEPA_PRODUCT_CHUNK_SIZE = 100; +const DEFAULT_BLACKLIST_FILE = path.join( + process.cwd(), + "category-blacklist.csv", +); + +let keepaTokensLeft = 1; +let keepaRefillRate = 1; +let keepaLastRequestMs = 0; + +function log( + level: "info" | "warn" | "error", + message: string, + ...args: any[] +) { + const timestamp = new Date().toISOString(); + console.log(`[${timestamp}] [${level.toUpperCase()}] ${message}`, ...args); +} + +function parseArgs(): ParsedArgs { + const args = process.argv.slice(2); + const outputDir = + readFlagValue(args, "--out-dir") ?? path.join(process.cwd(), "output"); + const blacklistFile = + readFlagValue(args, "--blacklist-file") ?? DEFAULT_BLACKLIST_FILE; + + const categoryLimitRaw = readFlagValue(args, "--category-limit"); + const perCategoryTopRaw = readFlagValue(args, "--per-category-top"); + const categoryCandidatePoolRaw = readFlagValue( + args, + "--category-candidate-pool", + ); + const minMonthlySoldRaw = readFlagValue(args, "--min-monthly-sold"); + + const categoryLimit = categoryLimitRaw + ? Number(categoryLimitRaw) + : DEFAULT_CATEGORY_LIMIT; + const perCategoryTop = perCategoryTopRaw + ? Number(perCategoryTopRaw) + : DEFAULT_PER_CATEGORY_TOP; + const categoryCandidatePool = categoryCandidatePoolRaw + ? Number(categoryCandidatePoolRaw) + : DEFAULT_CATEGORY_CANDIDATE_POOL; + const minMonthlySold = minMonthlySoldRaw + ? Number(minMonthlySoldRaw) + : DEFAULT_MIN_MONTHLY_SOLD; + + if (!Number.isInteger(categoryLimit) || categoryLimit <= 0) { + printUsageAndExit("--category-limit must be a positive integer."); + } + + if (!Number.isInteger(perCategoryTop) || perCategoryTop <= 0) { + printUsageAndExit("--per-category-top must be a positive integer."); + } + + if (!Number.isInteger(categoryCandidatePool) || categoryCandidatePool <= 0) { + printUsageAndExit("--category-candidate-pool must be a positive integer."); + } + + if (categoryCandidatePool < perCategoryTop) { + printUsageAndExit( + "--category-candidate-pool must be greater than or equal to --per-category-top.", + ); + } + + if (!Number.isInteger(minMonthlySold) || minMonthlySold < 0) { + printUsageAndExit("--min-monthly-sold must be a non-negative integer."); + } + + return { + outputDir, + categoryLimit, + perCategoryTop, + categoryCandidatePool, + minMonthlySold, + blacklistFile, + }; +} + +function readFlagValue(args: string[], flag: string): string | undefined { + const idx = args.indexOf(flag); + if (idx === -1) return undefined; + return args[idx + 1]; +} + +function printUsageAndExit(message: string): never { + if (message) { + log("error", message); + } + + log( + "error", + [ + "Usage:", + " bun run src/top-monthly-sold-by-category.ts [--category-limit 32] [--per-category-top 100] [--category-candidate-pool 500] [--min-monthly-sold 300] [--out-dir output] [--blacklist-file category-blacklist.csv]", + "", + "Flow:", + " 1) Discover categories and round-robin selection.", + " 2) For each category: fetch a candidate pool, then keep only sellable ASINs.", + " 3) Rank sellable ASINs by Keepa monthlySold and keep top N where monthlySold >= threshold.", + " 4) Enrich selected ASINs with Keepa + SP-API pricing/fees.", + " 5) LLM-analyze and persist per category.", + ].join("\n"), + ); + + process.exit(1); +} + +export async function insertCategoryRunSummary( + db: Database, + summary: CategoryRunSummary, + runTimestamp: string, +): Promise { + const query = ` + INSERT INTO category_analysis_runs ( + category_id, category_label, run_timestamp, + top_asins_checked, available_asins, + fba_count, fbm_count, skip_count, + status, error_message + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?); + `; + const result = db.run(query, [ + summary.categoryId, + summary.categoryLabel, + runTimestamp, + summary.topAsinsChecked, + summary.availableAsins, + summary.fba, + summary.fbm, + summary.skip, + summary.status, + summary.error, + ]); + // Bun's SQLite client returns { changes: number, lastInsertRowid: number | bigint } + return Number(result.lastInsertRowid); +} + +export async function updateCategoryRunSummary( + db: Database, + runId: number, + summary: Pick< + CategoryRunSummary, + | "topAsinsChecked" + | "availableAsins" + | "fba" + | "fbm" + | "skip" + | "status" + | "error" + >, +): Promise { + db.run( + ` + UPDATE category_analysis_runs + SET + top_asins_checked = ?, + available_asins = ?, + fba_count = ?, + fbm_count = ?, + skip_count = ?, + status = ?, + error_message = ? + WHERE id = ? + `, + [ + summary.topAsinsChecked, + summary.availableAsins, + summary.fba, + summary.fbm, + summary.skip, + summary.status, + summary.error, + runId, + ], + ); +} + +export async function insertProductAnalysisResults( + db: Database, + runId: number, + results: AnalysisResult[], +): Promise { + if (results.length === 0) { + return; + } + + const insertStmt = db.prepare(` + INSERT INTO product_analysis_results ( + asin, run_id, name, brand, category, unit_cost, + current_price, avg_price_90d, avg_price_90d_sheet, + selling_price_sheet, sales_rank, sales_rank_avg_90d, + seller_count, monthly_sold, rank_drops_30d, rank_drops_90d, + fba_fee, fbm_fee, referral_percent, can_sell, + sellability_status, sellability_reason, + verdict, confidence, reasoning, fetched_at + ) VALUES ( + ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, + ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, + ?, ?, ?, ?, ?, ? + ) + ON CONFLICT(asin) DO UPDATE SET + run_id = excluded.run_id, + name = excluded.name, + brand = excluded.brand, + category = excluded.category, + unit_cost = excluded.unit_cost, + current_price = excluded.current_price, + avg_price_90d = excluded.avg_price_90d, + avg_price_90d_sheet = excluded.avg_price_90d_sheet, + selling_price_sheet = excluded.selling_price_sheet, + sales_rank = excluded.sales_rank, + sales_rank_avg_90d = excluded.sales_rank_avg_90d, + seller_count = excluded.seller_count, + monthly_sold = excluded.monthly_sold, + rank_drops_30d = excluded.rank_drops_30d, + rank_drops_90d = excluded.rank_drops_90d, + fba_fee = excluded.fba_fee, + fbm_fee = excluded.fbm_fee, + referral_percent = excluded.referral_percent, + can_sell = excluded.can_sell, + sellability_status = excluded.sellability_status, + sellability_reason = excluded.sellability_reason, + verdict = excluded.verdict, + confidence = excluded.confidence, + reasoning = excluded.reasoning, + fetched_at = excluded.fetched_at; + `); + + db.transaction((resultsBatch: AnalysisResult[]) => { + for (const r of resultsBatch) { + const price = + r.product.keepa?.currentPrice ?? + r.product.record.sellingPriceFromSheet ?? + r.product.spApi.estimatedSalePrice; + const rank = r.product.keepa?.salesRank ?? r.product.record.amazonRank; + + insertStmt.run( + r.product.record.asin, + runId, + r.product.record.name, + r.product.record.brand ?? null, + r.product.record.category ?? + r.product.keepa?.categoryTree?.join(" > ") ?? + null, + r.product.record.unitCost ?? null, + price ?? null, + r.product.keepa?.avgPrice90 ?? null, + r.product.record.avgPrice90FromSheet ?? null, + r.product.record.sellingPriceFromSheet ?? null, + rank ?? null, + r.product.keepa?.salesRankAvg90 ?? null, + r.product.keepa?.sellerCount ?? null, + r.product.keepa?.monthlySold ?? null, + r.product.keepa?.salesRankDrops30 ?? null, + r.product.keepa?.salesRankDrops90 ?? null, + r.product.spApi.fbaFee ?? null, + r.product.spApi.fbmFee ?? null, + r.product.spApi.referralFeePercent ?? null, + r.product.spApi.canSell == null + ? "unknown" + : r.product.spApi.canSell + ? "yes" + : "no", + r.product.spApi.sellabilityStatus ?? null, + r.product.spApi.sellabilityReason ?? null, + r.verdict.verdict, + r.verdict.confidence, + r.verdict.reasoning ?? null, + r.product.fetchedAt, + ); + } + })(results); // Execute the transaction with the results batch +} + +function loadCategoryBlacklist(filePath: string): Set { + const blacklist = new Set(); + + if (!existsSync(filePath)) { + log( + "warn", + `Blacklist file not found at ${filePath}; continuing with no excluded categories.`, + ); + return blacklist; + } + + const raw = readFileSync(filePath, "utf8"); + const lines = raw.split(/\r?\n/); + + for (let i = 0; i < lines.length; i++) { + const lineNumber = i + 1; + const line = lines[i] ?? ""; + const trimmed = line.trim(); + if (!trimmed || trimmed.startsWith("#")) continue; + + const [idPart, namePart] = trimmed.split(",", 2); + const idToken = idPart?.trim() ?? ""; + const nameToken = namePart?.trim() ?? ""; + + // Allow header row: id,name + if (idToken.toLowerCase() === "id") { + continue; + } + + if (!idToken) { + log( + "warn", + `Blacklist CSV line ${lineNumber}: missing id, row ignored (${trimmed}).`, + ); + continue; + } + + const id = Number(idToken); + if (!Number.isInteger(id) || id <= 0) { + log( + "warn", + `Blacklist CSV line ${lineNumber}: invalid id '${idToken}', row ignored (${trimmed}).`, + ); + continue; + } + + if (!nameToken) { + log( + "warn", + `Blacklist CSV line ${lineNumber}: missing name for id ${id}; accepted but please add name.`, + ); + } + + if (blacklist.has(id)) { + log( + "warn", + `Blacklist CSV line ${lineNumber}: duplicate id ${id}, keeping first occurrence.`, + ); + continue; + } + + blacklist.add(id); + } + + return blacklist; +} + +function assertSpApiPrerequisites(): void { + const missing: string[] = []; + if (!config.spApiClientId) missing.push("SP_API_CLIENT_ID"); + if (!config.spApiClientSecret) missing.push("SP_API_CLIENT_SECRET"); + if (!config.spApiRefreshToken) missing.push("SP_API_REFRESH_TOKEN"); + if (!config.spApiSellerId) missing.push("SP_API_SELLER_ID"); + + if (missing.length > 0) { + throw new Error(`Missing required SP-API env vars: ${missing.join(", ")}`); + } +} + +function sleep(ms: number): Promise { + return new Promise((resolve) => setTimeout(resolve, ms)); +} + +function sanitizeFileSegment(value: string): string { + const compact = value.trim().toLowerCase().replace(/\s+/g, "-"); + const safe = compact.replace(/[^a-z0-9-_]+/g, "-").replace(/-+/g, "-"); + return safe.replace(/^-|-$/g, "") || "category"; +} + +function parseKeepaRateLimitPayload(text: string): { + refillInMs?: number; + tokensLeft?: number; + refillRate?: number; +} { + try { + const parsed = JSON.parse(text); + return { + refillInMs: + typeof parsed?.refillIn === "number" && Number.isFinite(parsed.refillIn) + ? Math.max(0, parsed.refillIn) + : undefined, + tokensLeft: + typeof parsed?.tokensLeft === "number" && + Number.isFinite(parsed.tokensLeft) + ? parsed.tokensLeft + : undefined, + refillRate: + typeof parsed?.refillRate === "number" && + Number.isFinite(parsed.refillRate) + ? parsed.refillRate + : undefined, + }; + } catch { + return {}; + } +} + +function computeBackoffMs(attempt: number, refillInMs?: number): number { + const refillBased = refillInMs != null ? refillInMs + 1500 : 0; + const exponential = Math.min(60_000, 2 ** attempt * 1000); + const base = Math.max(refillBased, exponential); + return base + Math.floor(Math.random() * 750); +} + +async function waitForKeepaToken(): Promise { + if (keepaTokensLeft > 0) return; + + const elapsedMinutes = (Date.now() - keepaLastRequestMs) / 60_000; + const regenerated = Math.floor(elapsedMinutes * keepaRefillRate); + if (regenerated > 0) { + keepaTokensLeft += regenerated; + return; + } + + const waitMs = + Math.ceil((1 / keepaRefillRate) * 60_000) - + (Date.now() - keepaLastRequestMs); + + if (waitMs > 0) { + log( + "info", + `Keepa tokens depleted; waiting ${Math.ceil(waitMs / 1000)}s...`, + ); + await sleep(waitMs); + } + + keepaTokensLeft = 1; +} + +async function keepaGetJson(pathAndQuery: string): Promise { + let rateLimitHits = 0; + + while (true) { + await waitForKeepaToken(); + + const response = await fetch(`${KEEPA_BASE}${pathAndQuery}`); + keepaLastRequestMs = Date.now(); + + if (response.ok) { + const data = (await response.json()) as any; + if (typeof data?.tokensLeft === "number") { + keepaTokensLeft = data.tokensLeft; + } + if (typeof data?.refillRate === "number" && data.refillRate > 0) { + keepaRefillRate = data.refillRate; + } + return data; + } + + const text = await response.text(); + + if (response.status === 429) { + const rate = parseKeepaRateLimitPayload(text); + if (typeof rate.tokensLeft === "number") { + keepaTokensLeft = rate.tokensLeft; + } + if (typeof rate.refillRate === "number" && rate.refillRate > 0) { + keepaRefillRate = rate.refillRate; + } + + rateLimitHits++; + const waitMs = computeBackoffMs(rateLimitHits, rate.refillInMs); + log( + "warn", + `Keepa rate limited (429). Retry ${rateLimitHits} in ${Math.ceil(waitMs / 1000)}s...`, + ); + await sleep(waitMs); + continue; + } + + throw new Error(`Keepa HTTP ${response.status}: ${text}`); + } +} + +function normalizeCategoryList(data: any): CategoryInfo[] { + const deduped = new Map(); + + const addRawCategory = (value: any): void => { + const id = Number( + value?.catId ?? value?.categoryId ?? value?.id ?? value?.nodeId, + ); + if (!Number.isInteger(id) || id <= 0) return; + + const label = String( + value?.name ?? value?.label ?? `Category ${id}`, + ).trim(); + const parentId = Number(value?.parent ?? value?.parentId ?? -1); + const childCount = Array.isArray(value?.children) + ? value.children.length + : Number.isInteger(value?.childCount) + ? Number(value.childCount) + : 0; + + if (id === 0 || label.toLowerCase() === "root" || parentId === -1) { + return; + } + + if (!deduped.has(id)) { + deduped.set(id, { + id, + label: label || `Category ${id}`, + parentId, + childCount: Math.max(0, childCount), + }); + } + }; + + if (Array.isArray(data?.categories)) { + for (const value of data.categories) { + addRawCategory(value); + } + } + + if (data?.categories && typeof data.categories === "object") { + for (const value of Object.values(data.categories)) { + addRawCategory(value); + } + } + + if (Array.isArray(data?.categoryList)) { + for (const value of data.categoryList) { + addRawCategory(value); + } + } + + return [...deduped.values()]; +} + +function prioritizeLikelyBestsellerCategories( + categories: CategoryInfo[], +): CategoryInfo[] { + const leaves: CategoryInfo[] = []; + const nonLeaves: CategoryInfo[] = []; + + for (const category of categories) { + if (category.childCount === 0) { + leaves.push(category); + } else { + nonLeaves.push(category); + } + } + + const withNamedLabels = (list: CategoryInfo[]) => + list.filter((c) => !/^Category\s+\d+$/i.test(c.label)); + + const withFallbackLabels = (list: CategoryInfo[]) => + list.filter((c) => /^Category\s+\d+$/i.test(c.label)); + + return [ + ...withNamedLabels(leaves), + ...withFallbackLabels(leaves), + ...withNamedLabels(nonLeaves), + ...withFallbackLabels(nonLeaves), + ]; +} + +function resolveRootCategory( + category: CategoryInfo, + byId: Map, +): CategoryInfo { + let current = category; + const seen = new Set(); + + while (current.parentId > 0 && !seen.has(current.id)) { + seen.add(current.id); + const parent = byId.get(current.parentId); + if (!parent) break; + current = parent; + } + + return current; +} + +function selectCategoriesAcrossRoots( + categories: CategoryInfo[], + maxCategories: number, +): CategoryInfo[] { + const byId = new Map(categories.map((c) => [c.id, c])); + const grouped = new Map(); + + for (const category of categories) { + const root = resolveRootCategory(category, byId); + const bucket = grouped.get(root.id) ?? []; + bucket.push(category); + grouped.set(root.id, bucket); + } + + const rootIds = [...grouped.keys()]; + const selected: CategoryInfo[] = []; + let depth = 0; + + while (selected.length < maxCategories) { + let progressed = false; + + for (const rootId of rootIds) { + const bucket = grouped.get(rootId) ?? []; + if (depth >= bucket.length) continue; + selected.push(bucket[depth]!); + progressed = true; + if (selected.length >= maxCategories) break; + } + + if (!progressed) break; + depth++; + } + + return selected; +} + +async function discoverCategories( + maxCategories: number, +): Promise { + const data = await keepaGetJson( + `/category?key=${encodeURIComponent(config.keepaApiKey)}&domain=${DOMAIN_US}&category=0`, + ); + + const categories = normalizeCategoryList(data); + if (categories.length === 0) { + throw new Error("Keepa category discovery returned no usable categories."); + } + + const prioritized = prioritizeLikelyBestsellerCategories(categories); + return selectCategoriesAcrossRoots(prioritized, maxCategories); +} + +async function fetchCategoryBestSellerAsins( + category: CategoryInfo, + limit: number, +): Promise { + const query = new URLSearchParams({ + key: config.keepaApiKey, + domain: String(DOMAIN_US), + category: String(category.id), + range: "0", + variations: "0", + sublist: category.parentId > 0 ? "1" : "0", + }); + + const data = await keepaGetJson(`/bestsellers?${query.toString()}`); + + const bestSellersList = data?.bestSellersList; + const candidates = [ + bestSellersList, + bestSellersList?.asinList, + bestSellersList?.asins, + bestSellersList?.bestSellers, + bestSellersList?.bestSellerAsins, + data?.asinList, + data?.asins, + data?.bestsellers, + data?.bestSellers, + data?.bestSellerAsins, + data?.bestsellerList?.asinList, + data?.categories?.[String(category.id)]?.asinList, + ]; + + for (const value of candidates) { + if (Array.isArray(value)) { + return [ + ...new Set(value.map((v) => String(v).trim()).filter(Boolean)), + ].slice(0, limit); + } + } + + return []; +} + +async function fetchSellabilityMap( + asins: string[], +): Promise> { + const sellability = new Map(); + + for (let i = 0; i < asins.length; i += SELLABILITY_BATCH_SIZE) { + const chunk = asins.slice(i, i + SELLABILITY_BATCH_SIZE); + const chunkResults = await fetchSellabilityBatch(chunk); + + for (const asin of chunk) { + const info = chunkResults.get(asin) ?? { + canSell: null, + sellabilityStatus: "unknown" as const, + sellabilityReason: "Sellability check returned no result", + }; + sellability.set(asin, info); + } + + log( + "info", + ` Sellability progress: ${Math.min(i + chunk.length, asins.length)}/${asins.length}`, + ); + } + + return sellability; +} + +async function fetchSpApiMap( + asins: string[], + sellabilityMap: Map, +): Promise> { + const pricingQueue = [...asins]; + const spApiMap = new Map(); + let done = 0; + + async function worker(): Promise { + while (pricingQueue.length > 0) { + const asin = pricingQueue.shift(); + if (!asin) return; + + const sellability = sellabilityMap.get(asin) ?? { + canSell: null, + sellabilityStatus: "unknown", + sellabilityReason: "Sellability missing", + }; + + const spApi = await fetchSpApiPricingAndFees(asin, sellability); + spApiMap.set(asin, spApi); + + done++; + if (done % 10 === 0 || done === asins.length) { + log("info", ` Pricing progress: ${done}/${asins.length}`); + } + } + } + + const workers = Array.from( + { length: Math.min(PRICING_CONCURRENCY, asins.length || 1) }, + () => worker(), + ); + + await Promise.all(workers); + return spApiMap; +} + +function pickKeepaNumber(...values: unknown[]): number | null { + for (const value of values) { + if (typeof value !== "number" || !Number.isFinite(value)) continue; + if (value < 0) continue; + return value; + } + return null; +} + +function extractCurrentPrice(csv: number[][] | undefined): number | null { + if (!Array.isArray(csv)) return null; + + for (const series of [csv[0], csv[1]]) { + if (Array.isArray(series) && series.length >= 2) { + const lastPrice = series[series.length - 1]; + if (typeof lastPrice === "number" && lastPrice > 0) { + return Math.round((lastPrice / 100) * 100) / 100; + } + } + } + + return null; +} + +function parseKeepaProduct(product: Record): KeepaData { + const stats = product.stats; + const csv = product.csv; + const salesRankDrops30 = pickKeepaNumber( + product.salesRankDrops30, + stats?.salesRankDrops30, + ); + const salesRankDrops90 = + pickKeepaNumber(product.salesRankDrops90, stats?.salesRankDrops90) ?? + (salesRankDrops30 != null ? salesRankDrops30 * 3 : null); + const monthlySold = + pickKeepaNumber(product.monthlySold, stats?.monthlySold) ?? + salesRankDrops30; + + return { + currentPrice: extractCurrentPrice(csv), + avgPrice90: stats?.avg?.[0] != null ? stats.avg[0] / 100 : null, + minPrice90: stats?.min?.[0] != null ? stats.min[0] / 100 : null, + maxPrice90: stats?.max?.[0] != null ? stats.max[0] / 100 : null, + salesRank: stats?.current?.[3] ?? null, + salesRankAvg90: stats?.avg?.[3] ?? null, + salesRankDrops30, + salesRankDrops90, + sellerCount: stats?.current?.[11] ?? null, + buyBoxSeller: product.buyBoxSellerId ?? null, + buyBoxPrice: stats?.current?.[18] != null ? stats.current[18] / 100 : null, + monthlySold, + categoryTree: + product.categoryTree?.map((c: { name: string }) => c.name) ?? [], + }; +} + +async function fetchKeepaEnrichmentMap( + asins: string[], +): Promise> { + const out = new Map(); + + for (let i = 0; i < asins.length; i += KEEPA_PRODUCT_CHUNK_SIZE) { + const chunk = asins.slice(i, i + KEEPA_PRODUCT_CHUNK_SIZE); + const asinParam = encodeURIComponent(chunk.join(",")); + const data = await keepaGetJson( + `/product?key=${encodeURIComponent(config.keepaApiKey)}&domain=${DOMAIN_US}&asin=${asinParam}&stats=90`, + ); + + const products = Array.isArray(data?.products) ? data.products : []; + for (const product of products) { + const asin = String(product?.asin ?? "").trim(); + if (!asin) continue; + out.set(asin, { + keepa: parseKeepaProduct(product), + title: String(product?.title ?? "").trim(), + }); + } + + log( + "info", + ` Keepa enrichment progress: ${Math.min(i + chunk.length, asins.length)}/${asins.length}`, + ); + } + + return out; +} + +function selectTopMonthlySoldAsins( + asins: string[], + keepaEnrichmentMap: Map, + perCategoryTop: number, + minMonthlySold: number, +): string[] { + return [...asins] + .map((asin) => ({ + asin, + monthlySold: keepaEnrichmentMap.get(asin)?.keepa.monthlySold, + })) + .filter( + (item): item is { asin: string; monthlySold: number } => + typeof item.monthlySold === "number" && + item.monthlySold >= minMonthlySold, + ) + .sort((a, b) => b.monthlySold - a.monthlySold) + .slice(0, perCategoryTop) + .map((item) => item.asin); +} + +function buildEnrichedProducts( + asins: string[], + sellabilityMap: Map, + spApiMap: Map, + keepaEnrichmentMap: Map, +): EnrichedProduct[] { + return asins.map((asin) => { + const sellability = sellabilityMap.get(asin) ?? { + canSell: null, + sellabilityStatus: "unknown" as const, + sellabilityReason: "Sellability missing", + }; + + const spApi = spApiMap.get(asin) ?? { + fbaFee: 0, + fbmFee: 0, + referralFeePercent: 15, + estimatedSalePrice: 0, + canSell: sellability.canSell, + sellabilityStatus: sellability.sellabilityStatus, + sellabilityReason: sellability.sellabilityReason, + }; + + const enrichedKeepa = keepaEnrichmentMap.get(asin); + const keepa = enrichedKeepa?.keepa ?? null; + const title = enrichedKeepa?.title ?? asin; + + const record: ProductRecord = { + asin, + name: title, + unitCost: 0, + category: undefined, + brand: undefined, + supplier: undefined, + }; + + if (keepa?.currentPrice && spApi.estimatedSalePrice === 0) { + spApi.estimatedSalePrice = keepa.currentPrice; + } + + return { + record, + keepa, + spApi, + fetchedAt: new Date().toISOString(), + }; + }); +} + +export async function processCategory( + db: Database, + runId: number, + category: CategoryInfo, + perCategoryTop: number, + categoryCandidatePool: number, + minMonthlySold: number, +): Promise { + log("info", `\nCategory ${category.label} (${category.id})`); + + const topAsins = await fetchCategoryBestSellerAsins( + category, + categoryCandidatePool, + ); + if (topAsins.length === 0) { + log("info", " Keepa returned no ASINs for this category."); + await updateCategoryRunSummary(db, runId, { + topAsinsChecked: 0, + availableAsins: 0, + fba: 0, + fbm: 0, + skip: 0, + status: "empty", + error: "No ASINs returned by Keepa", + }); + return { + categoryId: category.id, + categoryLabel: category.label, + topAsinsChecked: 0, + availableAsins: 0, + fba: 0, + fbm: 0, + skip: 0, + status: "empty", + error: "No ASINs returned by Keepa", + results: [], + }; + } + + const uniqueTopAsins = Array.from(new Set(topAsins)); + if (uniqueTopAsins.length !== topAsins.length) { + log( + "warn", + ` Removed ${topAsins.length - uniqueTopAsins.length} duplicate ASINs before analysis.`, + ); + } + + log("info", ` Candidate ASINs fetched: ${uniqueTopAsins.length}`); + const sellabilityMap = await fetchSellabilityMap(uniqueTopAsins); + + const availableAsins = uniqueTopAsins.filter((asin) => { + const info = sellabilityMap.get(asin); + return info?.canSell === true && info.sellabilityStatus === "available"; + }); + + log( + "info", + ` Sellable ASINs: ${availableAsins.length}/${uniqueTopAsins.length}`, + ); + if (availableAsins.length === 0) { + await updateCategoryRunSummary(db, runId, { + topAsinsChecked: uniqueTopAsins.length, + availableAsins: 0, + fba: 0, + fbm: 0, + skip: 0, + status: "empty", + error: "No sellable ASINs", + }); + return { + categoryId: category.id, + categoryLabel: category.label, + topAsinsChecked: uniqueTopAsins.length, + availableAsins: 0, + fba: 0, + fbm: 0, + skip: 0, + status: "empty", + error: "No sellable ASINs", + results: [], + }; + } + + const keepaEnrichment = await fetchKeepaEnrichmentMap(availableAsins); + const selectedAsins = selectTopMonthlySoldAsins( + availableAsins, + keepaEnrichment, + perCategoryTop, + minMonthlySold, + ); + + log( + "info", + ` Selected by monthly sold >= ${minMonthlySold}: ${selectedAsins.length}/${availableAsins.length}`, + ); + + if (selectedAsins.length === 0) { + await updateCategoryRunSummary(db, runId, { + topAsinsChecked: uniqueTopAsins.length, + availableAsins: 0, + fba: 0, + fbm: 0, + skip: 0, + status: "empty", + error: `No sellable ASINs met monthly sold >= ${minMonthlySold}`, + }); + return { + categoryId: category.id, + categoryLabel: category.label, + topAsinsChecked: uniqueTopAsins.length, + availableAsins: 0, + fba: 0, + fbm: 0, + skip: 0, + status: "empty", + error: `No sellable ASINs met monthly sold >= ${minMonthlySold}`, + results: [], + }; + } + + const spApiMap = await fetchSpApiMap(selectedAsins, sellabilityMap); + const enrichedProducts = buildEnrichedProducts( + selectedAsins, + sellabilityMap, + spApiMap, + keepaEnrichment, + ); + + const results: AnalysisResult[] = []; + let fba = 0; + let fbm = 0; + let skip = 0; + const totalBatches = Math.ceil(enrichedProducts.length / LLM_BATCH_SIZE); + + for (let i = 0; i < enrichedProducts.length; i += LLM_BATCH_SIZE) { + const batch = enrichedProducts.slice(i, i + LLM_BATCH_SIZE); + const batchNum = Math.floor(i / LLM_BATCH_SIZE) + 1; + log("info", ` LLM batch ${batchNum}/${totalBatches}...`); + + let batchVerdicts: LlmVerdict[]; + try { + batchVerdicts = await analyzeProducts(batch); + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + log("warn", ` LLM batch failed: ${message}`); + batchVerdicts = batch.map((p) => ({ + asin: p.record.asin, + verdict: "SKIP", + confidence: 0, + reasoning: "LLM analysis failed", + })); + } + + const verdictByAsin = new Map(batchVerdicts.map((v) => [v.asin, v])); + const batchResults: AnalysisResult[] = batch.map((product) => ({ + product, + verdict: verdictByAsin.get(product.record.asin) ?? { + asin: product.record.asin, + verdict: "SKIP", + confidence: 0, + reasoning: "LLM returned no verdict", + }, + })); + + await insertProductAnalysisResults(db, runId, batchResults); + + for (const result of batchResults) { + results.push(result); + if (result.verdict.verdict === "FBA") { + fba++; + } else if (result.verdict.verdict === "FBM") { + fbm++; + } else { + skip++; + } + } + + await updateCategoryRunSummary(db, runId, { + topAsinsChecked: uniqueTopAsins.length, + availableAsins: selectedAsins.length, + fba, + fbm, + skip, + status: "running", + error: "", + }); + + log( + "info", + ` Persisted batch ${batchNum}/${totalBatches} (${batchResults.length} rows, totals FBA/FBM/SKIP=${fba}/${fbm}/${skip})`, + ); + + if (i + LLM_BATCH_SIZE < enrichedProducts.length) { + await sleep(1500); + } + } + + await updateCategoryRunSummary(db, runId, { + topAsinsChecked: uniqueTopAsins.length, + availableAsins: selectedAsins.length, + fba, + fbm, + skip, + status: "ok", + error: "", + }); + + return { + categoryId: category.id, + categoryLabel: category.label, + topAsinsChecked: uniqueTopAsins.length, + availableAsins: selectedAsins.length, + fba, + fbm, + skip, + status: "ok", + error: "", + results, + }; +} + +export async function main(): Promise { + const args = parseArgs(); + assertSpApiPrerequisites(); + + mkdirSync(args.outputDir, { recursive: true }); + const DB_PATH = + process.env.RESULTS_DB_PATH || path.join(process.cwd(), "results.db"); + initDb(DB_PATH); + const db = getDb(DB_PATH); + + log("info", "Starting per-category monthly-sold pipeline"); + log("info", `Marketplace: ${config.spApiMarketplaceId}`); + log("info", `SP-API region: ${config.spApiRegion}`); + log("info", `Category limit: ${args.categoryLimit}`); + log( + "info", + `Top ASINs per category after monthly sort: ${args.perCategoryTop}`, + ); + log("info", `Category candidate pool: ${args.categoryCandidatePool}`); + log("info", `Minimum monthly sold: ${args.minMonthlySold}`); + log("info", `Blacklist file: ${args.blacklistFile}`); + + const categoryBlacklist = loadCategoryBlacklist(args.blacklistFile); + log("info", `Loaded ${categoryBlacklist.size} blacklisted category IDs.`); + + const categories = await discoverCategories(args.categoryLimit); + const allowedCategories = categories.filter( + (c) => !categoryBlacklist.has(c.id), + ); + const blacklistedCount = categories.length - allowedCategories.length; + log( + "info", + `Discovered ${categories.length} categories (${blacklistedCount} blacklisted, ${allowedCategories.length} to process).`, + ); + + const runTimestamp = new Date().toISOString(); + let processedCategories = 0; + let totalInsertedAsins = 0; + const allCategorySummaries: CategoryRunSummary[] = []; + + for (const category of allowedCategories) { + let categorySummary: CategoryRunSummary; + let runId: number | undefined; + try { + runId = await insertCategoryRunSummary( + db, + { + categoryId: category.id, + categoryLabel: category.label, + topAsinsChecked: 0, + availableAsins: 0, + fba: 0, + fbm: 0, + skip: 0, + status: "running", + error: "", + results: [], + }, + runTimestamp, + ); + + categorySummary = await processCategory( + db, + runId, + category, + args.perCategoryTop, + args.categoryCandidatePool, + args.minMonthlySold, + ); + + totalInsertedAsins += categorySummary.results?.length ?? 0; + + processedCategories++; + allCategorySummaries.push({ ...categorySummary, runId }); + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + log( + "warn", + `Skipping category ${category.label} (${category.id}) due to error: ${message}`, + ); + categorySummary = { + categoryId: category.id, + categoryLabel: category.label, + topAsinsChecked: 0, + availableAsins: 0, + fba: 0, + fbm: 0, + skip: 0, + status: "failed", + error: message, + results: [], + }; + if (runId) { + await updateCategoryRunSummary(db, runId, { + topAsinsChecked: 0, + availableAsins: 0, + fba: 0, + fbm: 0, + skip: 0, + status: "failed", + error: message, + }); + } + processedCategories++; + allCategorySummaries.push({ ...categorySummary, runId }); + } + } + + log("info", "\nRun summary"); + log("info", `Categories discovered/selected: ${categories.length}`); + log("info", `Categories processed: ${processedCategories}`); + log("info", `Total ASINs inserted into DB: ${totalInsertedAsins}`); +} + +if (import.meta.main) { + main().catch((err) => { + log("error", `Monthly-sold process crashed: ${String(err)}`); + process.exit(1); + }); +}