diff --git a/src/analysis-pipeline.ts b/src/analysis-pipeline.ts index 5337f94..483aa7a 100644 --- a/src/analysis-pipeline.ts +++ b/src/analysis-pipeline.ts @@ -14,11 +14,14 @@ import type { export const DEFAULT_LLM_BATCH_SIZE = 5; export const DEFAULT_PRICING_CONCURRENCY = 5; +export type SellabilityFilter = "available" | "all"; + export type AnalysisPipelineOptions = { llmBatchSize?: number; pricingConcurrency?: number; llmBatchDelayMs?: number; llmRetryDelayMs?: number; + sellability?: SellabilityFilter; }; export function chunkArray(items: T[], chunkSize: number): T[][] { @@ -56,6 +59,7 @@ export async function processProductChunk( ); const llmBatchDelayMs = Math.max(0, options.llmBatchDelayMs ?? 5_000); const llmRetryDelayMs = Math.max(0, options.llmRetryDelayMs ?? 10_000); + const sellabilityFilter = options.sellability ?? "available"; console.log(`\nChecking cache for ${products.length} products...`); const cached = new Map(); @@ -65,7 +69,10 @@ export async function processProductChunk( for (const p of products) { const hit = await getCache(p.asin); if (hit) { - if (hit.spApi.sellabilityStatus === "available") { + if ( + sellabilityFilter === "all" || + hit.spApi.sellabilityStatus === "available" + ) { console.log(` [cache hit] ${p.asin}`); cached.set(p.asin, hit); } else { @@ -103,7 +110,10 @@ export async function processProductChunk( }; sellabilityMap.set(p.asin, info); - if (info.sellabilityStatus === "available") { + if ( + sellabilityFilter === "all" || + info.sellabilityStatus === "available" + ) { availableProducts.push(p); console.log( ` [available] ${p.asin} - status=${info.sellabilityStatus}`, @@ -116,9 +126,15 @@ export async function processProductChunk( } } - console.log( - `\nSellability gate: ${availableProducts.length} available, ${unavailableProducts.length} excluded`, - ); + if (sellabilityFilter === "all") { + console.log( + `\nSellability gate disabled: including all ${availableProducts.length} products`, + ); + } else { + console.log( + `\nSellability gate: ${availableProducts.length} available, ${unavailableProducts.length} excluded`, + ); + } } let keepaResults = new Map(); @@ -224,13 +240,17 @@ export async function processProductChunk( let verdicts; try { - verdicts = await analyzeProducts(batch); + verdicts = await analyzeProducts(batch, { + ignoreSellability: sellabilityFilter === "all", + }); } catch { if (llmRetryDelayMs > 0) { await wait(llmRetryDelayMs); } try { - verdicts = await analyzeProducts(batch); + verdicts = await analyzeProducts(batch, { + ignoreSellability: sellabilityFilter === "all", + }); } catch { verdicts = null; } diff --git a/src/index.ts b/src/index.ts index 87ea880..419df3a 100644 --- a/src/index.ts +++ b/src/index.ts @@ -2,27 +2,57 @@ import { readProducts } from "./reader.ts"; import { connectCache, disconnectCache } from "./cache.ts"; import { printResults, writeResultsToDb } from "./writer.ts"; import { initDb, closeDb } from "./database.ts"; -import { chunkArray, processProductChunk } from "./analysis-pipeline.ts"; +import { + chunkArray, + processProductChunk, + type SellabilityFilter, +} from "./analysis-pipeline.ts"; import path from "node:path"; import type { AnalysisResult } from "./types.ts"; const DB_PATH = "./results.db"; const INPUT_BATCH_SIZE = 50; -function parseArgs(): { inputFile: string; outputFile?: string } { +function parseSellabilityArg(args: string[]): SellabilityFilter { + const sellabilityArg = args.find((a) => a.startsWith("--sellability=")); + const sellabilityValueFromEquals = sellabilityArg?.split("=")[1]; + const sellabilityIdx = args.indexOf("--sellability"); + const sellabilityValueFromNext = + sellabilityIdx !== -1 ? args[sellabilityIdx + 1] : undefined; + const rawSellability = sellabilityValueFromEquals ?? sellabilityValueFromNext; + + if (!rawSellability) return "available"; + + const normalized = rawSellability.toLowerCase(); + if (normalized === "available" || normalized === "all") { + return normalized; + } + + console.error( + `Invalid --sellability value: \"${rawSellability}\". Use \"available\" or \"all\".`, + ); + process.exit(1); +} + +function parseArgs(): { + inputFile: string; + outputFile?: string; + sellability: SellabilityFilter; +} { const args = process.argv.slice(2); const inputFile = args.find((a) => !a.startsWith("--")); const outIdx = args.indexOf("--out"); const outputFile = outIdx !== -1 ? args[outIdx + 1] : undefined; + const sellability = parseSellabilityArg(args); if (!inputFile) { console.error( - "Usage: bun run src/index.ts [--out results.csv]", + "Usage: bun run src/index.ts [--out results.csv] [--sellability available|all]", ); process.exit(1); } - return { inputFile, outputFile }; + return { inputFile, outputFile, sellability }; } function resolveBaseOutputPath(inputFile: string, outputFile?: string): string { @@ -33,7 +63,9 @@ function resolveBaseOutputPath(inputFile: string, outputFile?: string): string { } async function main() { - const { inputFile, outputFile } = parseArgs(); + const { inputFile, outputFile, sellability } = parseArgs(); + + console.log(`Sellability filter: ${sellability}`); console.log("Connecting to Redis..."); await connectCache(); @@ -66,7 +98,7 @@ async function main() { console.log( `\n=== Input chunk ${chunkIndex + 1}/${productChunks.length} (${chunk.length} products) ===`, ); - const chunkResults = await processProductChunk(chunk); + const chunkResults = await processProductChunk(chunk, { sellability }); allResults.push(...chunkResults); } diff --git a/src/llm.ts b/src/llm.ts index 44f168b..ba69dcd 100644 --- a/src/llm.ts +++ b/src/llm.ts @@ -1,7 +1,7 @@ import { config } from "./config.ts"; import type { EnrichedProduct, LlmVerdict } from "./types.ts"; -const SYSTEM_PROMPT = `You are an expert Amazon product analyst specializing in FBA and FBM fulfillment strategy. +const SYSTEM_PROMPT_STRICT = `You are an expert Amazon product analyst specializing in FBA and FBM fulfillment strategy. Given product data, evaluate each product's viability for selling on Amazon. Consider: @@ -29,11 +29,48 @@ Return ONLY a raw JSON array (no markdown, no code fences, no explanation before Keep each reasoning under 100 characters to stay within output limits and mention key blocker if skipped (e.g., restricted, low demand, thin margin).`; +const SYSTEM_PROMPT_ASSUME_LISTABLE = `You are an expert Amazon product analyst specializing in FBA and FBM fulfillment strategy. + +Given product data, evaluate each product's viability for selling on Amazon. Consider: + +1. **Sales Velocity**: monthlySold and salesRankDrops30 are the most important signals. A product that doesn't sell is worthless regardless of margin. salesRankDrops30 = approximate units sold in 30 days. monthlySold is Keepa's estimate. +2. **Margin Analysis**: Sale price minus unit cost minus fees (FBA or FBM). Aim for >30% ROI minimum. The spreadsheet may include FBA NET and gross profit estimates — cross-check against Keepa pricing data. +3. **Sales Rank (BSR)**: Lower rank = higher demand. Rank <50,000 is good, <1,000 is excellent. +4. **Sales Rank Trend**: Compare current rank vs 90d average. Lower current = improving demand. +5. **Competition**: Number of sellers and Buy Box dynamics. Fewer sellers = easier entry. +6. **Price Stability**: Large price swings (high max vs low min over 90d) = volatile/risky. +7. **FBA vs FBM**: FBA preferred for fast-selling, small/light items. FBM for oversized, slow-moving, or high-margin items where fee savings matter. +8. **MOQ & Capital**: High MOQ with thin margins is risky. +9. **Supply Availability**: Total quantity available from supplier — low stock means limited runway. + +Decision policy: +- Ignore seller eligibility restrictions/status in this run. +- Assume all products are listable by this seller account. +- Prioritize profitable + high-velocity products. +- Use "SKIP" when data quality is poor or risk is high. + +Return ONLY a raw JSON array (no markdown, no code fences, no explanation before or after). One verdict per product: +[{ "asin": "B...", "verdict": "FBA" | "FBM" | "SKIP", "confidence": 0-100, "reasoning": "..." }] + +Keep each reasoning under 100 characters to stay within output limits and mention key blocker if skipped (e.g., low demand, thin margin).`; + +type AnalyzeProductsOptions = { + ignoreSellability?: boolean; +}; + +function getSystemPrompt(options: AnalyzeProductsOptions): string { + if (options.ignoreSellability) { + return SYSTEM_PROMPT_ASSUME_LISTABLE; + } + return SYSTEM_PROMPT_STRICT; +} + export async function analyzeProducts( products: EnrichedProduct[], + options: AnalyzeProductsOptions = {}, ): Promise { try { - return await analyzeProductsInternal(products); + return await analyzeProductsInternal(products, options); } catch (err) { const msg = String(err); if (products.length > 1 && msg.includes("Context size has been exceeded")) { @@ -44,7 +81,7 @@ export async function analyzeProducts( const fallback: LlmVerdict[] = []; for (const product of products) { try { - const single = await analyzeProductsInternal([product]); + const single = await analyzeProductsInternal([product], options); fallback.push( single[0] ?? { asin: product.record.asin, @@ -70,8 +107,12 @@ export async function analyzeProducts( async function analyzeProductsInternal( products: EnrichedProduct[], + options: AnalyzeProductsOptions, ): Promise { - const productSummaries = products.map(summarizeForLlm); + const productSummaries = products.map((p) => + summarizeForLlm(p, options.ignoreSellability === true), + ); + const systemPrompt = getSystemPrompt(options); const res = await fetch(`${config.llmUrl}/chat/completions`, { method: "POST", @@ -82,7 +123,7 @@ async function analyzeProductsInternal( body: JSON.stringify({ model: config.llmModel, messages: [ - { role: "system", content: SYSTEM_PROMPT }, + { role: "system", content: systemPrompt }, { role: "user", content: JSON.stringify(productSummaries, null, 2) }, ], temperature: 0.3, @@ -102,7 +143,7 @@ async function analyzeProductsInternal( return parseVerdicts(content, products); } -function summarizeForLlm(p: EnrichedProduct) { +function summarizeForLlm(p: EnrichedProduct, ignoreSellability: boolean) { const salePrice = p.keepa?.currentPrice ?? p.record.sellingPriceFromSheet ?? @@ -169,9 +210,11 @@ function summarizeForLlm(p: EnrichedProduct) { referralFee != null ? Math.round(referralFee * 100) / 100 : null, }, sellerEligibility: { - canSell: p.spApi.canSell, - status: p.spApi.sellabilityStatus, - reason: clampText(p.spApi.sellabilityReason, 120), + canSell: ignoreSellability ? true : p.spApi.canSell, + status: ignoreSellability ? "available" : p.spApi.sellabilityStatus, + reason: ignoreSellability + ? "Assumed listable by sellability=all" + : clampText(p.spApi.sellabilityReason, 120), }, estimatedProfit: fbaProfit != null && fbmProfit != null diff --git a/src/mid-range-sellers-by-category.test.ts b/src/mid-range-sellers-by-category.test.ts index 32ae6a1..6e2e1ac 100644 --- a/src/mid-range-sellers-by-category.test.ts +++ b/src/mid-range-sellers-by-category.test.ts @@ -320,7 +320,7 @@ beforeEach(() => { }) as unknown as typeof globalThis.fetch; }); -test("processCategory keeps mid-range matches even when sellability is restricted", async () => { +test("processCategory only analyzes sellable mid-range matches", async () => { const mockCategory = { id: 1, label: "Category 1", @@ -363,8 +363,8 @@ test("processCategory keeps mid-range matches even when sellability is restricte expect(summary.status).toBe("ok"); expect(summary.topAsinsChecked).toBe(5); - expect(summary.availableAsins).toBe(2); - expect(summary.results?.length).toBe(2); + expect(summary.availableAsins).toBe(1); + expect(summary.results?.length).toBe(1); const productResults = db .query( @@ -377,15 +377,8 @@ test("processCategory keeps mid-range matches even when sellability is restricte sellability_status: string; }>; - expect(productResults.length).toBe(2); - expect(productResults.map((row) => row.asin)).toEqual([ - "B000000003", - "B000000001", - ]); - - const restricted = productResults.find((row) => row.asin === "B000000003"); - expect(restricted?.can_sell).toBe("no"); - expect(restricted?.sellability_status).toBe("restricted"); + expect(productResults.length).toBe(1); + expect(productResults.map((row) => row.asin)).toEqual(["B000000001"]); const sellable = productResults.find((row) => row.asin === "B000000001"); expect(sellable?.can_sell).toBe("yes"); diff --git a/src/mid-range-sellers-by-category.ts b/src/mid-range-sellers-by-category.ts index a1cbc51..90b5f61 100644 --- a/src/mid-range-sellers-by-category.ts +++ b/src/mid-range-sellers-by-category.ts @@ -1,5 +1,7 @@ import { existsSync, mkdirSync, readFileSync } from "node:fs"; import path from "node:path"; +import { createInterface } from "node:readline/promises"; +import { stdin as input, stdout as output } from "node:process"; import { type Database, getDb, initDb } from "./database.ts"; import { config } from "./config.ts"; import { @@ -28,10 +30,15 @@ type CategoryInfo = { }; type ParsedArgs = { + listCategories: boolean; + selectCategories: boolean; + categoryIds: number[]; + sellabilityGate: "strict" | "soft" | "off"; outputDir: string; categoryLimit: number; perCategoryTop: number; categoryCandidatePool: number; + candidateBatchSize: number; minMonthlySold: number; maxMonthlySold: number; minPrice: number; @@ -40,9 +47,19 @@ type ParsedArgs = { maxSellerCount: number; minAmazonBuyboxSharePct: number; maxAmazonBuyboxSharePct: number; + maxAsinsAnalyzed: number | null; + maxKeepaProductsFetched: number | null; blacklistFile: string; }; +type RuntimeBudget = { + maxAsinsAnalyzed: number | null; + maxKeepaProductsFetched: number | null; + analyzedAsins: number; + keepaProductsFetched: number; + stopReason: string; +}; + type CategoryRunSummary = { categoryId: number; categoryLabel: string; @@ -64,6 +81,7 @@ const KEEPA_MINUTES_OFFSET = 21_564_000; const DEFAULT_CATEGORY_LIMIT = 32; const DEFAULT_PER_CATEGORY_TOP = 100; const DEFAULT_CATEGORY_CANDIDATE_POOL = 500; +const DEFAULT_CANDIDATE_BATCH_SIZE = 60; const DEFAULT_MIN_MONTHLY_SOLD = 100; const DEFAULT_MAX_MONTHLY_SOLD = 1000; const DEFAULT_MIN_PRICE = 15; @@ -98,6 +116,10 @@ function log( function parseArgs(): ParsedArgs { const args = process.argv.slice(2); + const listCategories = hasFlag(args, "--list-categories"); + const selectCategories = hasFlag(args, "--select-categories"); + const categoryIdsRaw = readFlagValue(args, "--category-ids"); + const sellabilityGateRaw = readFlagValue(args, "--sellability-gate"); const outputDir = readFlagValue(args, "--out-dir") ?? path.join(process.cwd(), "output"); const blacklistFile = @@ -109,6 +131,7 @@ function parseArgs(): ParsedArgs { args, "--category-candidate-pool", ); + const candidateBatchSizeRaw = readFlagValue(args, "--candidate-batch-size"); const minMonthlySoldRaw = readFlagValue(args, "--min-monthly-sold"); const maxMonthlySoldRaw = readFlagValue(args, "--max-monthly-sold"); const minPriceRaw = readFlagValue(args, "--min-price"); @@ -123,6 +146,11 @@ function parseArgs(): ParsedArgs { args, "--max-amazon-buybox-share-pct", ); + const maxAsinsAnalyzedRaw = readFlagValue(args, "--max-asins-analyzed"); + const maxKeepaProductsFetchedRaw = readFlagValue( + args, + "--max-keepa-products-fetched", + ); const categoryLimit = categoryLimitRaw ? Number(categoryLimitRaw) @@ -133,6 +161,9 @@ function parseArgs(): ParsedArgs { const categoryCandidatePool = categoryCandidatePoolRaw ? Number(categoryCandidatePoolRaw) : DEFAULT_CATEGORY_CANDIDATE_POOL; + const candidateBatchSize = candidateBatchSizeRaw + ? Number(candidateBatchSizeRaw) + : DEFAULT_CANDIDATE_BATCH_SIZE; const minMonthlySold = minMonthlySoldRaw ? Number(minMonthlySoldRaw) : DEFAULT_MIN_MONTHLY_SOLD; @@ -153,6 +184,22 @@ function parseArgs(): ParsedArgs { const maxAmazonBuyboxSharePct = maxAmazonBuyboxSharePctRaw ? Number(maxAmazonBuyboxSharePctRaw) : DEFAULT_MAX_AMAZON_BUYBOX_SHARE_PCT; + const maxAsinsAnalyzed = maxAsinsAnalyzedRaw + ? Number(maxAsinsAnalyzedRaw) + : null; + const maxKeepaProductsFetched = maxKeepaProductsFetchedRaw + ? Number(maxKeepaProductsFetchedRaw) + : null; + const categoryIds = parseCategoryIds(categoryIdsRaw); + const sellabilityGate = (sellabilityGateRaw ?? "soft").toLowerCase(); + + if ( + sellabilityGate !== "strict" && + sellabilityGate !== "soft" && + sellabilityGate !== "off" + ) { + printUsageAndExit("--sellability-gate must be one of: strict, soft, off."); + } if (!Number.isInteger(categoryLimit) || categoryLimit <= 0) { printUsageAndExit("--category-limit must be a positive integer."); @@ -172,6 +219,10 @@ function parseArgs(): ParsedArgs { ); } + if (!Number.isInteger(candidateBatchSize) || candidateBatchSize <= 0) { + printUsageAndExit("--candidate-batch-size must be a positive integer."); + } + if (!Number.isInteger(minMonthlySold) || minMonthlySold < 0) { printUsageAndExit("--min-monthly-sold must be a non-negative integer."); } @@ -240,11 +291,32 @@ function parseArgs(): ParsedArgs { ); } + if ( + maxAsinsAnalyzed != null && + (!Number.isInteger(maxAsinsAnalyzed) || maxAsinsAnalyzed <= 0) + ) { + printUsageAndExit("--max-asins-analyzed must be a positive integer."); + } + + if ( + maxKeepaProductsFetched != null && + (!Number.isInteger(maxKeepaProductsFetched) || maxKeepaProductsFetched <= 0) + ) { + printUsageAndExit( + "--max-keepa-products-fetched must be a positive integer.", + ); + } + return { + listCategories, + selectCategories, + categoryIds, + sellabilityGate, outputDir, categoryLimit, perCategoryTop, categoryCandidatePool, + candidateBatchSize, minMonthlySold, maxMonthlySold, minPrice, @@ -253,10 +325,36 @@ function parseArgs(): ParsedArgs { maxSellerCount, minAmazonBuyboxSharePct, maxAmazonBuyboxSharePct, + maxAsinsAnalyzed, + maxKeepaProductsFetched, blacklistFile, }; } +function hasFlag(args: string[], flag: string): boolean { + return args.includes(flag); +} + +function parseCategoryIds(raw: string | undefined): number[] { + if (!raw) { + return []; + } + + const parsed = raw + .split(",") + .map((part) => part.trim()) + .filter(Boolean) + .map((part) => Number(part)); + + if (parsed.some((value) => !Number.isInteger(value) || value <= 0)) { + printUsageAndExit( + "--category-ids must be a comma-separated list of positive integers.", + ); + } + + return Array.from(new Set(parsed)); +} + function readFlagValue(args: string[], flag: string): string | undefined { const idx = args.indexOf(flag); if (idx === -1) return undefined; @@ -272,20 +370,106 @@ function printUsageAndExit(message: string): never { "error", [ "Usage:", - " bun run src/mid-range-sellers-by-category.ts [--category-limit 32] [--per-category-top 100] [--category-candidate-pool 500] [--min-monthly-sold 100] [--max-monthly-sold 1000] [--min-price 15] [--max-price 200] [--min-seller-count 3] [--max-seller-count 20] [--min-amazon-buybox-share-pct 15] [--max-amazon-buybox-share-pct 85] [--out-dir output] [--blacklist-file category-blacklist.csv]", + " bun run src/mid-range-sellers-by-category.ts [--category-limit 32] [--list-categories] [--select-categories] [--category-ids 281053,172282] [--sellability-gate soft] [--per-category-top 100] [--category-candidate-pool 500] [--candidate-batch-size 60] [--min-monthly-sold 100] [--max-monthly-sold 1000] [--min-price 15] [--max-price 200] [--min-seller-count 3] [--max-seller-count 20] [--min-amazon-buybox-share-pct 15] [--max-amazon-buybox-share-pct 85] [--max-asins-analyzed 250] [--max-keepa-products-fetched 500] [--out-dir output] [--blacklist-file category-blacklist.csv]", + "", + "Selection:", + " --list-categories Discover and print runnable categories, then exit.", + " --select-categories Print categories and interactively choose by index or ID.", + " --category-ids Comma-separated category IDs to run.", + " --sellability-gate strict | soft | off (default: soft).", "", "Flow:", " 1) Discover categories and round-robin selection.", - " 2) For each category: fetch a candidate pool and compute sellability metadata.", - " 3) Select mid-range ASINs by monthlySold, price, sellerCount, and Amazon buy box share rules.", - " 4) Enrich selected ASINs with Keepa + SP-API pricing/fees.", - " 5) LLM-analyze and persist selected ASINs regardless of sellability status.", + " 2) For each category, process candidate ASINs in streaming batches.", + " 3) Strict gate: only ASINs with sellability=available proceed to Keepa.", + " 4) Keepa-enrich sellable ASINs, apply mid-range filters, fetch pricing/fees, then LLM-analyze.", + " 5) Persist each analyzed batch immediately and update run counters incrementally.", ].join("\n"), ); process.exit(1); } +function printCategorySelectionTable( + allowedCategories: CategoryInfo[], + blacklistedCount: number, +): void { + log( + "info", + `Category menu (${allowedCategories.length} runnable, ${blacklistedCount} blacklisted):`, + ); + for (let i = 0; i < allowedCategories.length; i++) { + const category = allowedCategories[i]!; + log( + "info", + `${String(i + 1).padStart(3, " ")}. id=${category.id} label=${category.label}`, + ); + } +} + +async function promptCategoryIds( + allowedCategories: CategoryInfo[], +): Promise> { + const rl = createInterface({ input, output }); + try { + const answer = ( + await rl.question( + "Enter category indexes or IDs (comma-separated), or 'all': ", + ) + ).trim(); + + if (!answer || answer.toLowerCase() === "all") { + return new Set(allowedCategories.map((category) => category.id)); + } + + const indexByPosition = new Map(); + for (let i = 0; i < allowedCategories.length; i++) { + indexByPosition.set(i + 1, allowedCategories[i]!.id); + } + const allowedIdSet = new Set( + allowedCategories.map((category) => category.id), + ); + + const selected = new Set(); + const tokens = answer + .split(",") + .map((token) => token.trim()) + .filter(Boolean); + + for (const token of tokens) { + const parsed = Number(token); + if (!Number.isInteger(parsed) || parsed <= 0) { + throw new Error( + `Invalid token '${token}'. Use positive integers only.`, + ); + } + + if (allowedIdSet.has(parsed)) { + selected.add(parsed); + continue; + } + + const idFromIndex = indexByPosition.get(parsed); + if (idFromIndex != null) { + selected.add(idFromIndex); + continue; + } + + throw new Error( + `Value '${token}' is neither a visible index nor a runnable category ID.`, + ); + } + + if (selected.size === 0) { + throw new Error("No categories selected."); + } + + return selected; + } finally { + rl.close(); + } +} + export async function insertCategoryRunSummary( db: Database, summary: CategoryRunSummary, @@ -1250,6 +1434,39 @@ function buildEnrichedProducts( }); } +function remainingBudget(max: number | null, used: number): number { + if (max == null) { + return Number.MAX_SAFE_INTEGER; + } + return Math.max(0, max - used); +} + +function stopIfBudgetHit(budget: RuntimeBudget, reason: string): boolean { + if (!budget.stopReason) { + budget.stopReason = reason; + } + return true; +} + +function shouldKeepCandidateBySellability( + info: SellabilityInfo | undefined, + mode: "strict" | "soft" | "off", +): boolean { + if (mode === "off") { + return true; + } + + if (!info) { + return mode === "soft"; + } + + if (mode === "strict") { + return info.canSell === true && info.sellabilityStatus === "available"; + } + + return info.canSell === true || info.sellabilityStatus === "unknown"; +} + export async function processCategory( db: Database, runId: number, @@ -1264,9 +1481,20 @@ export async function processCategory( maxSellerCount: number, minAmazonBuyboxSharePct: number, maxAmazonBuyboxSharePct: number, + sellabilityGate: "strict" | "soft" | "off", + runtimeBudget?: RuntimeBudget, + candidateBatchSize = DEFAULT_CANDIDATE_BATCH_SIZE, ): Promise { log("info", `\nCategory ${category.label} (${category.id})`); + const budget: RuntimeBudget = runtimeBudget ?? { + maxAsinsAnalyzed: null, + maxKeepaProductsFetched: null, + analyzedAsins: 0, + keepaProductsFetched: 0, + stopReason: "", + }; + const topAsins = await fetchCategoryBestSellerAsins( category, categoryCandidatePool, @@ -1306,144 +1534,129 @@ export async function processCategory( log("info", ` Candidate ASINs fetched: ${uniqueTopAsins.length}`); - const sellabilityMap = new Map(); - const keepaEnrichment = new Map< - string, - { keepa: KeepaData; title: string } - >(); - const cachedSpApiMap = new Map(); - const preAnalyzedByAsin = new Map(); - const scheduledAsins = new Set(); - const preanalysisTasks: Promise[] = []; - const uncachedAsins: string[] = []; + let fba = 0; + let fbm = 0; + let skip = 0; - for (const asin of uniqueTopAsins) { - const cached = await getApiCache(asin); - if (!cached) { - uncachedAsins.push(asin); - continue; + const results: AnalysisResult[] = []; + let checkedAsins = 0; + let sellableAsins = 0; + let keepaEnrichedAsins = 0; + + for ( + let offset = 0; + offset < uniqueTopAsins.length && results.length < perCategoryTop; + offset += candidateBatchSize + ) { + if (budget.stopReason) { + break; } - if (!cached.keepa) { - uncachedAsins.push(asin); - continue; - } - - keepaEnrichment.set(asin, { - keepa: cached.keepa, - title: cached.title, - }); - cachedSpApiMap.set(asin, cached.spApi); - sellabilityMap.set(asin, { - canSell: cached.spApi.canSell, - sellabilityStatus: cached.spApi.sellabilityStatus, - sellabilityReason: cached.spApi.sellabilityReason, - }); - } - - async function schedulePreanalysisForAsins(asins: string[]): Promise { - const toSchedule = asins.filter( - (asin) => !scheduledAsins.has(asin) && keepaEnrichment.has(asin), + const candidateAsins = uniqueTopAsins.slice( + offset, + offset + candidateBatchSize, + ); + checkedAsins += candidateAsins.length; + log( + "info", + ` Candidate batch ${Math.floor(offset / candidateBatchSize) + 1}: ${candidateAsins.length} ASINs`, ); - if (toSchedule.length === 0) { - return; + const sellabilityMap = new Map(); + const keepaEnrichment = new Map< + string, + { keepa: KeepaData; title: string } + >(); + const spApiMap = new Map(); + const pendingSellabilityAsins: string[] = []; + + for (const asin of candidateAsins) { + const cached = await getApiCache(asin); + if (!cached) { + pendingSellabilityAsins.push(asin); + continue; + } + + sellabilityMap.set(asin, { + canSell: cached.spApi.canSell, + sellabilityStatus: cached.spApi.sellabilityStatus, + sellabilityReason: cached.spApi.sellabilityReason, + }); + spApiMap.set(asin, cached.spApi); + if (cached.keepa) { + keepaEnrichment.set(asin, { + keepa: cached.keepa, + title: cached.title, + }); + } } - for (const asin of toSchedule) { - scheduledAsins.add(asin); + if (pendingSellabilityAsins.length > 0) { + const fetchedSellability = await fetchSellabilityMap( + pendingSellabilityAsins, + ); + for (const [asin, info] of fetchedSellability.entries()) { + sellabilityMap.set(asin, info); + } } - const task = (async () => { - const spApiMap = new Map(); - const uncachedSpApiAsins: string[] = []; - - for (const asin of toSchedule) { - const cached = cachedSpApiMap.get(asin); - if (cached) { - spApiMap.set(asin, cached); - continue; - } - uncachedSpApiAsins.push(asin); - } - - if (uncachedSpApiAsins.length > 0) { - const fetchedSpApiMap = await fetchSpApiMap( - uncachedSpApiAsins, - sellabilityMap, - ); - for (const [asin, spApi] of fetchedSpApiMap.entries()) { - spApiMap.set(asin, spApi); - cachedSpApiMap.set(asin, spApi); - } - } - - const enrichedProducts = buildEnrichedProducts( - toSchedule, - sellabilityMap, - spApiMap, - keepaEnrichment, - ); - - for (const product of enrichedProducts) { - await setApiCache( - product.record.asin, - { - title: product.record.name, - keepa: product.keepa, - spApi: product.spApi, - fetchedAt: product.fetchedAt, - }, - MID_RANGE_API_CACHE_TTL_SECONDS, - ); - } - - const preTotalBatches = Math.ceil( - enrichedProducts.length / LLM_BATCH_SIZE, - ); - for (let i = 0; i < enrichedProducts.length; i += LLM_BATCH_SIZE) { - const batch = enrichedProducts.slice(i, i + LLM_BATCH_SIZE); - const batchNum = Math.floor(i / LLM_BATCH_SIZE) + 1; - log( - "info", - ` Pre-analysis batch ${batchNum}/${preTotalBatches} (${batch.length} ASINs)...`, - ); - - let batchVerdicts: LlmVerdict[]; - try { - batchVerdicts = await analyzeProducts(batch); - } catch (err) { - const message = err instanceof Error ? err.message : String(err); - log("warn", ` Pre-analysis batch failed: ${message}`); - continue; - } - - const verdictByAsin = new Map(batchVerdicts.map((v) => [v.asin, v])); - for (const product of batch) { - const verdict = verdictByAsin.get(product.record.asin); - if (!verdict) { - continue; - } - - preAnalyzedByAsin.set(product.record.asin, { - product, - verdict, - }); - } - } - })().catch((err) => { - const message = err instanceof Error ? err.message : String(err); - log("warn", ` Background pre-analysis failed: ${message}`); + const currentlySellable = candidateAsins.filter((asin) => { + const info = sellabilityMap.get(asin); + return shouldKeepCandidateBySellability(info, sellabilityGate); }); + sellableAsins += currentlySellable.length; - preanalysisTasks.push(task); - } + if (currentlySellable.length === 0) { + log( + "info", + ` Batch has no candidates after sellability gate (${sellabilityGate}); skipping Keepa/LLM.`, + ); + continue; + } - await schedulePreanalysisForAsins( - selectMidRangeAsins( - uniqueTopAsins, + const missingKeepaAsins = currentlySellable.filter( + (asin) => !keepaEnrichment.has(asin), + ); + + if (missingKeepaAsins.length > 0) { + const keepaRemaining = remainingBudget( + budget.maxKeepaProductsFetched, + budget.keepaProductsFetched, + ); + if (keepaRemaining === 0) { + stopIfBudgetHit( + budget, + "Reached max Keepa products fetched cap before enrichment.", + ); + break; + } + + const keepaRequestAsins = missingKeepaAsins.slice(0, keepaRemaining); + if (keepaRequestAsins.length < missingKeepaAsins.length) { + stopIfBudgetHit( + budget, + "Reached max Keepa products fetched cap during enrichment.", + ); + } + + const fetchedKeepa = await fetchKeepaEnrichmentMap(keepaRequestAsins); + budget.keepaProductsFetched += keepaRequestAsins.length; + keepaEnrichedAsins += fetchedKeepa.size; + + for (const [asin, value] of fetchedKeepa.entries()) { + keepaEnrichment.set(asin, value); + } + } + + const remainingSlots = perCategoryTop - results.length; + if (remainingSlots <= 0) { + break; + } + + const selectedAsins = selectMidRangeAsins( + currentlySellable, keepaEnrichment, - perCategoryTop, + remainingSlots, minMonthlySold, maxMonthlySold, minPrice, @@ -1452,176 +1665,84 @@ export async function processCategory( maxSellerCount, minAmazonBuyboxSharePct, maxAmazonBuyboxSharePct, - ), - ); - - if (uncachedAsins.length > 0) { - const fetchedSellability = await fetchSellabilityMap(uncachedAsins); - for (const [asin, info] of fetchedSellability.entries()) { - sellabilityMap.set(asin, info); - } - - const fetchedKeepa = await fetchKeepaEnrichmentMap( - uncachedAsins, - (chunkMap) => { - for (const [asin, value] of chunkMap.entries()) { - keepaEnrichment.set(asin, value); - } - - const provisionalSelectedAsins = selectMidRangeAsins( - uniqueTopAsins, - keepaEnrichment, - perCategoryTop, - minMonthlySold, - maxMonthlySold, - minPrice, - maxPrice, - minSellerCount, - maxSellerCount, - minAmazonBuyboxSharePct, - maxAmazonBuyboxSharePct, - ); - - void schedulePreanalysisForAsins(provisionalSelectedAsins); - }, ); - for (const [asin, value] of fetchedKeepa.entries()) { - keepaEnrichment.set(asin, value); - } - } - log( - "info", - ` API cache hits: ${uniqueTopAsins.length - uncachedAsins.length}/${uniqueTopAsins.length}`, - ); - - const sellableCount = uniqueTopAsins.filter((asin) => { - const info = sellabilityMap.get(asin); - return info?.canSell === true && info.sellabilityStatus === "available"; - }).length; - log( - "info", - ` Sellability snapshot: sellable=${sellableCount} non-sellable-or-unknown=${uniqueTopAsins.length - sellableCount}`, - ); - - const selectedAsins = selectMidRangeAsins( - uniqueTopAsins, - keepaEnrichment, - perCategoryTop, - minMonthlySold, - maxMonthlySold, - minPrice, - maxPrice, - minSellerCount, - maxSellerCount, - minAmazonBuyboxSharePct, - maxAmazonBuyboxSharePct, - ); - - log( - "info", - ` Selected mid-range ASINs: ${selectedAsins.length}/${uniqueTopAsins.length} (monthlySold=${minMonthlySold}-${maxMonthlySold}, price=${minPrice}-${maxPrice}, sellerCount=${minSellerCount}-${maxSellerCount}, amazonBuyboxShare=${minAmazonBuyboxSharePct}-${maxAmazonBuyboxSharePct} when Amazon sells)`, - ); - - if (selectedAsins.length === 0) { - await updateCategoryRunSummary(db, runId, { - topAsinsChecked: uniqueTopAsins.length, - availableAsins: 0, - fba: 0, - fbm: 0, - skip: 0, - status: "empty", - error: "No ASINs matched the configured mid-range criteria", - }); - return { - categoryId: category.id, - categoryLabel: category.label, - topAsinsChecked: uniqueTopAsins.length, - availableAsins: 0, - fba: 0, - fbm: 0, - skip: 0, - status: "empty", - error: "No ASINs matched the configured mid-range criteria", - results: [], - }; - } - - const spApiMap = new Map(); - await Promise.allSettled(preanalysisTasks); - - const resultByAsin = new Map(); - for (const asin of selectedAsins) { - const pre = preAnalyzedByAsin.get(asin); - if (pre) { - resultByAsin.set(asin, pre); - } - } - - const missingFinalAsins = selectedAsins.filter( - (asin) => !resultByAsin.has(asin), - ); - if (missingFinalAsins.length > 0) { log( "info", - ` Catch-up analysis required for ${missingFinalAsins.length}/${selectedAsins.length} selected ASINs...`, + ` Batch selected mid-range ASINs: ${selectedAsins.length}/${currentlySellable.length}`, ); - } - const selectedUncachedSpApiAsins: string[] = []; - for (const asin of missingFinalAsins) { - const cached = cachedSpApiMap.get(asin); - if (cached) { - spApiMap.set(asin, cached); - } else { - selectedUncachedSpApiAsins.push(asin); + if (selectedAsins.length === 0) { + continue; } - } - if (selectedUncachedSpApiAsins.length > 0) { - const fetchedSpApiMap = await fetchSpApiMap( - selectedUncachedSpApiAsins, + const selectedWithoutSpApi = selectedAsins.filter( + (asin) => !spApiMap.has(asin), + ); + if (selectedWithoutSpApi.length > 0) { + const fetchedSpApi = await fetchSpApiMap( + selectedWithoutSpApi, + sellabilityMap, + ); + for (const [asin, value] of fetchedSpApi.entries()) { + spApiMap.set(asin, value); + } + } + + const enrichedProducts = buildEnrichedProducts( + selectedAsins, sellabilityMap, + spApiMap, + keepaEnrichment, ); - for (const [asin, spApi] of fetchedSpApiMap.entries()) { - spApiMap.set(asin, spApi); - cachedSpApiMap.set(asin, spApi); + + for (const product of enrichedProducts) { + await setApiCache( + product.record.asin, + { + title: product.record.name, + keepa: product.keepa, + spApi: product.spApi, + fetchedAt: product.fetchedAt, + }, + MID_RANGE_API_CACHE_TTL_SECONDS, + ); } - } - const catchUpProducts = buildEnrichedProducts( - missingFinalAsins, - sellabilityMap, - spApiMap, - keepaEnrichment, - ); + for (let i = 0; i < enrichedProducts.length; i += LLM_BATCH_SIZE) { + if (budget.stopReason) { + break; + } - for (const product of catchUpProducts) { - await setApiCache( - product.record.asin, - { - title: product.record.name, - keepa: product.keepa, - spApi: product.spApi, - fetchedAt: product.fetchedAt, - }, - MID_RANGE_API_CACHE_TTL_SECONDS, - ); - } + const analyzeRemaining = remainingBudget( + budget.maxAsinsAnalyzed, + budget.analyzedAsins, + ); + if (analyzeRemaining === 0) { + stopIfBudgetHit(budget, "Reached max ASINs analyzed cap."); + break; + } - if (catchUpProducts.length > 0) { - const catchUpBatches = Math.ceil(catchUpProducts.length / LLM_BATCH_SIZE); - for (let i = 0; i < catchUpProducts.length; i += LLM_BATCH_SIZE) { - const batch = catchUpProducts.slice(i, i + LLM_BATCH_SIZE); - const batchNum = Math.floor(i / LLM_BATCH_SIZE) + 1; - log("info", ` Catch-up LLM batch ${batchNum}/${catchUpBatches}...`); + const rawBatch = enrichedProducts.slice(i, i + LLM_BATCH_SIZE); + const batch = rawBatch.slice(0, analyzeRemaining); + if (batch.length === 0) { + break; + } + if (batch.length < rawBatch.length) { + stopIfBudgetHit( + budget, + "Reached max ASINs analyzed cap during LLM analysis.", + ); + } + + log("info", ` Analyze/persist batch (${batch.length} ASINs)...`); let batchVerdicts: LlmVerdict[]; try { batchVerdicts = await analyzeProducts(batch); } catch (err) { const message = err instanceof Error ? err.message : String(err); - log("warn", ` Catch-up LLM batch failed: ${message}`); + log("warn", ` LLM batch failed: ${message}`); batchVerdicts = batch.map((p) => ({ asin: p.record.asin, verdict: "SKIP", @@ -1631,117 +1752,168 @@ export async function processCategory( } const verdictByAsin = new Map(batchVerdicts.map((v) => [v.asin, v])); - for (const product of batch) { - resultByAsin.set(product.record.asin, { - product, - verdict: verdictByAsin.get(product.record.asin) ?? { - asin: product.record.asin, - verdict: "SKIP", - confidence: 0, - reasoning: "LLM returned no verdict", - }, - }); + const batchResults: AnalysisResult[] = batch.map((product) => ({ + product, + verdict: verdictByAsin.get(product.record.asin) ?? { + asin: product.record.asin, + verdict: "SKIP", + confidence: 0, + reasoning: "LLM returned no verdict", + }, + })); + + await insertProductAnalysisResults(db, runId, batchResults); + + for (const result of batchResults) { + if (result.verdict.verdict === "FBA") { + fba++; + } else if (result.verdict.verdict === "FBM") { + fbm++; + } else { + skip++; + } } + + budget.analyzedAsins += batchResults.length; + results.push(...batchResults); + + await updateCategoryRunSummary(db, runId, { + topAsinsChecked: checkedAsins, + availableAsins: results.length, + fba, + fbm, + skip, + status: "running", + error: "", + }); + + log( + "info", + ` Persisted ${batchResults.length} rows (category totals FBA/FBM/SKIP=${fba}/${fbm}/${skip})`, + ); } } - const results: AnalysisResult[] = []; - for (const asin of selectedAsins) { - const existing = resultByAsin.get(asin); - if (existing) { - results.push(existing); - continue; - } - - const fallbackProducts = buildEnrichedProducts( - [asin], - sellabilityMap, - spApiMap, - keepaEnrichment, - ); - const fallbackProduct = fallbackProducts[0]; - if (!fallbackProduct) continue; - results.push({ - product: fallbackProduct, - verdict: { - asin, - verdict: "SKIP", - confidence: 0, - reasoning: "Missing pre-analysis and catch-up result", - }, + if (results.length === 0) { + const emptyReason = + budget.stopReason || + "No sellable ASINs matched the configured mid-range criteria"; + await updateCategoryRunSummary(db, runId, { + topAsinsChecked: checkedAsins, + availableAsins: 0, + fba, + fbm, + skip, + status: "empty", + error: emptyReason, }); + return { + categoryId: category.id, + categoryLabel: category.label, + topAsinsChecked: checkedAsins, + availableAsins: 0, + fba, + fbm, + skip, + status: "empty", + error: emptyReason, + results, + }; } log( "info", - ` Final selected ASINs resolved: ${results.length}/${selectedAsins.length} (pre-analyzed=${selectedAsins.length - missingFinalAsins.length}, catch-up=${missingFinalAsins.length})`, + ` Category stream totals: checked=${checkedAsins}, sellable=${sellableAsins}, keepaEnriched=${keepaEnrichedAsins}, analyzed=${results.length}`, ); - let fba = 0; - let fbm = 0; - let skip = 0; - - const totalBatches = Math.ceil(results.length / LLM_BATCH_SIZE); - - for (let i = 0; i < results.length; i += LLM_BATCH_SIZE) { - const batchResults = results.slice(i, i + LLM_BATCH_SIZE); - const batchNum = Math.floor(i / LLM_BATCH_SIZE) + 1; - log("info", ` Persisting batch ${batchNum}/${totalBatches}...`); - - await insertProductAnalysisResults(db, runId, batchResults); - - for (const result of batchResults) { - if (result.verdict.verdict === "FBA") { - fba++; - } else if (result.verdict.verdict === "FBM") { - fbm++; - } else { - skip++; - } - } - - await updateCategoryRunSummary(db, runId, { - topAsinsChecked: uniqueTopAsins.length, - availableAsins: selectedAsins.length, - fba, - fbm, - skip, - status: "running", - error: "", - }); - - log( - "info", - ` Persisted batch ${batchNum}/${totalBatches} (${batchResults.length} rows, totals FBA/FBM/SKIP=${fba}/${fbm}/${skip})`, - ); - } - await updateCategoryRunSummary(db, runId, { - topAsinsChecked: uniqueTopAsins.length, - availableAsins: selectedAsins.length, + topAsinsChecked: checkedAsins, + availableAsins: results.length, fba, fbm, skip, status: "ok", - error: "", + error: budget.stopReason, }); return { categoryId: category.id, categoryLabel: category.label, - topAsinsChecked: uniqueTopAsins.length, - availableAsins: selectedAsins.length, + topAsinsChecked: checkedAsins, + availableAsins: results.length, fba, fbm, skip, status: "ok", - error: "", + error: budget.stopReason, results, }; } export async function main(): Promise { const args = parseArgs(); + const categoryBlacklist = loadCategoryBlacklist(args.blacklistFile); + log("info", `Loaded ${categoryBlacklist.size} blacklisted category IDs.`); + + const categories = await discoverCategories(args.categoryLimit); + const initiallyAllowedCategories = categories.filter( + (c) => !categoryBlacklist.has(c.id), + ); + const blacklistedCount = + categories.length - initiallyAllowedCategories.length; + log( + "info", + `Discovered ${categories.length} categories (${blacklistedCount} blacklisted, ${initiallyAllowedCategories.length} runnable).`, + ); + + printCategorySelectionTable(initiallyAllowedCategories, blacklistedCount); + + if ( + args.listCategories && + !args.selectCategories && + args.categoryIds.length === 0 + ) { + log("info", "Listed categories only; exiting without running analysis."); + return; + } + + let selectedCategoryIds = new Set(args.categoryIds); + if (args.selectCategories) { + const interactiveSelection = await promptCategoryIds( + initiallyAllowedCategories, + ); + selectedCategoryIds = interactiveSelection; + } + + let allowedCategories = initiallyAllowedCategories; + if (selectedCategoryIds.size > 0) { + allowedCategories = initiallyAllowedCategories.filter((category) => + selectedCategoryIds.has(category.id), + ); + if (allowedCategories.length === 0) { + throw new Error( + "Selected category IDs did not match any runnable discovered categories.", + ); + } + const missingIds = [...selectedCategoryIds].filter( + (id) => !allowedCategories.some((category) => category.id === id), + ); + if (missingIds.length > 0) { + log( + "warn", + `Ignoring unavailable selected category IDs: ${missingIds.join(",")}`, + ); + } + } + + if ( + args.listCategories && + args.selectCategories === false && + args.categoryIds.length > 0 + ) { + log("info", "Proceeding with categories selected via --category-ids."); + } + assertSpApiPrerequisites(); await connectCache(); @@ -1761,6 +1933,8 @@ export async function main(): Promise { `Top ASINs per category after mid-range filter: ${args.perCategoryTop}`, ); log("info", `Category candidate pool: ${args.categoryCandidatePool}`); + log("info", `Candidate batch size: ${args.candidateBatchSize}`); + log("info", `Sellability gate mode: ${args.sellabilityGate}`); log( "info", `Monthly sold range: ${args.minMonthlySold}-${args.maxMonthlySold}`, @@ -1779,26 +1953,31 @@ export async function main(): Promise { `API cache TTL: ${Math.floor(MID_RANGE_API_CACHE_TTL_SECONDS / 3600)}h`, ); log("info", `Blacklist file: ${args.blacklistFile}`); - - const categoryBlacklist = loadCategoryBlacklist(args.blacklistFile); - log("info", `Loaded ${categoryBlacklist.size} blacklisted category IDs.`); - - const categories = await discoverCategories(args.categoryLimit); - const allowedCategories = categories.filter( - (c) => !categoryBlacklist.has(c.id), - ); - const blacklistedCount = categories.length - allowedCategories.length; + log("info", `Max ASINs analyzed cap: ${args.maxAsinsAnalyzed ?? "none"}`); log( "info", - `Discovered ${categories.length} categories (${blacklistedCount} blacklisted, ${allowedCategories.length} to process).`, + `Max Keepa products fetched cap: ${args.maxKeepaProductsFetched ?? "none"}`, ); + log("info", `Categories selected to process: ${allowedCategories.length}`); const runTimestamp = new Date().toISOString(); let processedCategories = 0; let totalInsertedAsins = 0; const allCategorySummaries: CategoryRunSummary[] = []; + const runtimeBudget: RuntimeBudget = { + maxAsinsAnalyzed: args.maxAsinsAnalyzed, + maxKeepaProductsFetched: args.maxKeepaProductsFetched, + analyzedAsins: 0, + keepaProductsFetched: 0, + stopReason: "", + }; for (const category of allowedCategories) { + if (runtimeBudget.stopReason) { + log("info", `Stopping run early: ${runtimeBudget.stopReason}`); + break; + } + let categorySummary: CategoryRunSummary; let runId: number | undefined; try { @@ -1833,6 +2012,9 @@ export async function main(): Promise { args.maxSellerCount, args.minAmazonBuyboxSharePct, args.maxAmazonBuyboxSharePct, + args.sellabilityGate, + runtimeBudget, + args.candidateBatchSize, ); totalInsertedAsins += categorySummary.results?.length ?? 0; @@ -1877,6 +2059,13 @@ export async function main(): Promise { log("info", `Categories discovered/selected: ${categories.length}`); log("info", `Categories processed: ${processedCategories}`); log("info", `Total ASINs inserted into DB: ${totalInsertedAsins}`); + log( + "info", + `Budget usage: analyzed=${runtimeBudget.analyzedAsins}, keepaFetched=${runtimeBudget.keepaProductsFetched}`, + ); + if (runtimeBudget.stopReason) { + log("warn", `Run stopped by budget cap: ${runtimeBudget.stopReason}`); + } } finally { await disconnectCache(); }