diff --git a/README.md b/README.md index f5f5afe..46c3298 100644 --- a/README.md +++ b/README.md @@ -31,6 +31,12 @@ bun run src/index.ts leads.xlsx bun run src/index.ts leads.csv --out results.xlsx ``` +Large-file behavior: + +- If the input has more than 50 products, processing is done in chunks of 50. +- Each chunk is analyzed and written to a numbered output file, for example: `results_part_001.xlsx`, `results_part_002.xlsx`, ... +- If `--out` is omitted for large files, the base output name defaults to `_results.xlsx` and chunk files are still written with numbered suffixes. + Quick SP-API connectivity tests: ```bash @@ -90,8 +96,9 @@ Numeric parsing accepts plain numbers as well as formatted values like `$12.50`, 3. **Sellability gate** — check all uncached ASINs against SP-API `getListingsRestrictions` (concurrency: 5 workers); immediately skip ASINs with status `not_available` and `canSell=false` (no Keepa/fees wasted) 4. **Keepa fetch** — batch the sellable (uncached) ASINs in a single API call (up to 100 per request) 5. **Enrich** — fetch SP-API pricing + FBA/FBM fees for sellable ASINs; combine with Keepa data and spreadsheet data -6. **LLM analysis** — send batches of 5 sellable products to LM Studio for FBA/FBM/SKIP verdict; skipped ASINs get auto-SKIP verdict (confidence 100) and bypass LLM entirely -7. **Output** — print results table to console (includes all ASINs), optionally write CSV/XLSX +6. **LLM analysis** — send batches of 5 available products to LM Studio for FBA/FBM/SKIP verdict +7. **Chunk orchestration** — if input size is greater than 50, run phases 2-6 for each 50-item chunk sequentially +8. **Output** — print results table to console (includes all ASINs); for chunked runs, always write seriated chunk files (`*_part_001`, `*_part_002`, ...); for non-chunked runs, write a single file only when `--out` is provided ## Output columns @@ -119,11 +126,10 @@ ASIN, Name, Brand, Category, Unit Cost, Current Price, Avg Price 90d, Sales Rank ## Notes -- **Sellability-first optimization**: SP-API `getListingsRestrictions` is checked first to filter out unsellable items before consuming Keepa tokens or running full SP-API pricing/fees queries. This saves API calls and reduces runtime for large lead lists. +- **Available-only processing**: SP-API `getListingsRestrictions` is checked first and only ASINs with `sellabilityStatus=available` are enriched, analyzed, and included in outputs. Restricted, not_available, and unknown items are excluded. - **SP-API concurrency**: `fetchSellabilityBatch` limits concurrent requests to 5 workers to avoid 429 throttling. Pricing+fees fetches also use 5 concurrent workers. - **No batch endpoint**: Amazon SP-API does not provide batch endpoints for `getListingsRestrictions` or `getMyFeesEstimate*`. Concurrency limiting with the library's built-in `auto_request_throttled` safety net prevents overwhelming the API. - **Keepa rate limiting**: The client reads `tokensLeft` and `refillRate` from each API response and waits automatically when tokens are exhausted. With a Pro subscription (1 token/min), all 100 ASINs in a batch cost 1 token. - **Redis is optional**: If Redis is unavailable the tool runs without caching — every run re-fetches from Keepa. - **SP-API**: `src/sp-api.ts` provides `fetchSellability`, `fetchSellabilityBatch`, and `fetchSpApiPricingAndFees` functions. If SP-API credentials are missing or a call fails, the tool falls back to conservative fee defaults and keeps processing. -- **Skipped ASINs**: Products with `not_available` sellability status and `canSell=false` appear in output with verdict `SKIP`, confidence 100, and reasoning from the sellability check. They do not consume LLM inference. - **Sandbox vs production**: When `SP_API_USE_SANDBOX=true`, production ASIN calls can be denied. Use sandbox-compatible test data or set it to `false` for live marketplace connectivity. diff --git a/src/index.ts b/src/index.ts index 5f7d25b..e60ab2d 100644 --- a/src/index.ts +++ b/src/index.ts @@ -4,15 +4,18 @@ import { fetchSellabilityBatch, fetchSpApiPricingAndFees } from "./sp-api.ts"; import { connectCache, getCache, setCache, disconnectCache } from "./cache.ts"; import { analyzeProducts } from "./llm.ts"; import { printResults, writeResultsCsv } from "./writer.ts"; +import path from "node:path"; import type { EnrichedProduct, AnalysisResult, KeepaData, ProductRecord, SellabilityInfo, + SpApiData, } from "./types.ts"; const LLM_BATCH_SIZE = 5; +const INPUT_BATCH_SIZE = 50; function parseArgs(): { inputFile: string; outputFile?: string } { const args = process.argv.slice(2); @@ -29,41 +32,67 @@ function parseArgs(): { inputFile: string; outputFile?: string } { return { inputFile, outputFile }; } -async function main() { - const { inputFile, outputFile } = parseArgs(); - - console.log("Connecting to Redis..."); - await connectCache(); - - // Phase 1: Read input file - console.log(`\nReading ${inputFile}...`); - const products = readProducts(inputFile); - - if (products.length === 0) { - console.error("No valid products found in input file."); - process.exit(1); +function chunkArray(items: T[], chunkSize: number): T[][] { + const chunks: T[][] = []; + for (let i = 0; i < items.length; i += chunkSize) { + chunks.push(items.slice(i, i + chunkSize)); } + return chunks; +} - // Phase 2: Check cache for all ASINs +function resolveBaseOutputPath(inputFile: string, outputFile?: string): string { + if (outputFile) return outputFile; + + const parsedInput = path.parse(inputFile); + return path.join(parsedInput.dir, `${parsedInput.name}_results.xlsx`); +} + +function buildChunkOutputPath( + baseOutputPath: string, + chunkIndex: number, +): string { + const parsed = path.parse(baseOutputPath); + const extension = parsed.ext || ".xlsx"; + const chunkSuffix = String(chunkIndex + 1).padStart(3, "0"); + return path.join( + parsed.dir, + `${parsed.name}_part_${chunkSuffix}${extension}`, + ); +} + +async function processProductChunk( + products: ProductRecord[], +): Promise { + // Phase 2: Check cache for all ASINs in chunk console.log(`\nChecking cache for ${products.length} products...`); const cached = new Map(); + const excludedCachedAsins = new Set(); const uncachedProducts: ProductRecord[] = []; for (const p of products) { const hit = await getCache(p.asin); if (hit) { - console.log(` [cache hit] ${p.asin}`); - cached.set(p.asin, hit); + if (hit.spApi.sellabilityStatus === "available") { + console.log(` [cache hit] ${p.asin}`); + cached.set(p.asin, hit); + } else { + excludedCachedAsins.add(p.asin); + console.log( + ` [exclude cached] ${p.asin} — status=${hit.spApi.sellabilityStatus}`, + ); + } } else { uncachedProducts.push(p); } } - console.log(`${cached.size} cached, ${uncachedProducts.length} to fetch`); + console.log( + `${cached.size} cached available, ${excludedCachedAsins.size} cached excluded, ${uncachedProducts.length} to fetch`, + ); - // Phase 3: Sellability gate — check all uncached ASINs before anything else + // Phase 3: Sellability gate — check uncached ASINs before anything else const sellabilityMap = new Map(); - const sellableProducts: ProductRecord[] = []; - const skippedProducts: ProductRecord[] = []; + const availableProducts: ProductRecord[] = []; + const unavailableProducts: ProductRecord[] = []; if (uncachedProducts.length > 0) { console.log( @@ -81,50 +110,46 @@ async function main() { }; sellabilityMap.set(p.asin, info); - // Keep: available, restricted (can request approval), unknown (proceed cautiously) - // Skip: not_available with canSell explicitly false - if ( - info.sellabilityStatus === "not_available" && - info.canSell === false - ) { - skippedProducts.push(p); + // Keep only ASINs that are explicitly available. + if (info.sellabilityStatus === "available") { + availableProducts.push(p); console.log( - ` [skip] ${p.asin} — ${info.sellabilityReason ?? "not available"}`, + ` [available] ${p.asin} — status=${info.sellabilityStatus}`, ); } else { - sellableProducts.push(p); + unavailableProducts.push(p); console.log( - ` [sellable] ${p.asin} — status=${info.sellabilityStatus}`, + ` [exclude] ${p.asin} — status=${info.sellabilityStatus}, reason=${info.sellabilityReason ?? "n/a"}`, ); } } console.log( - `\nSellability gate: ${sellableProducts.length} sellable, ${skippedProducts.length} skipped`, + `\nSellability gate: ${availableProducts.length} available, ${unavailableProducts.length} excluded`, ); } - // Phase 4: Keepa batch fetch — only for sellable (uncached) ASINs + // Phase 4: Keepa batch fetch — only for available (uncached) ASINs let keepaResults = new Map(); - if (sellableProducts.length > 0) { - console.log(`\nFetching ${sellableProducts.length} ASINs from Keepa...`); + if (availableProducts.length > 0) { + console.log(`\nFetching ${availableProducts.length} ASINs from Keepa...`); try { keepaResults = await fetchKeepaDataBatch( - sellableProducts.map((p) => p.asin), + availableProducts.map((p) => p.asin), ); } catch (err) { console.warn(`Keepa batch fetch failed: ${err}`); } } - // Phase 5: SP-API pricing + fees — only for sellable ASINs + // Phase 5: SP-API pricing + fees — only for available ASINs console.log( - `\nFetching pricing & fees for ${sellableProducts.length} ASINs...`, + `\nFetching pricing & fees for ${availableProducts.length} ASINs...`, ); - const spApiResults = new Map(); + const spApiResults = new Map(); // Concurrency-limited pricing+fees fetches - const pricingQueue = [...sellableProducts]; + const pricingQueue = [...availableProducts]; let pricingDone = 0; async function fetchNextPricing(): Promise { @@ -140,16 +165,16 @@ async function main() { spApiResults.set(p.asin, spApi); pricingDone++; - if (pricingDone % 10 === 0 || pricingDone === sellableProducts.length) { + if (pricingDone % 10 === 0 || pricingDone === availableProducts.length) { console.log( - ` [pricing] ${pricingDone}/${sellableProducts.length} fetched`, + ` [pricing] ${pricingDone}/${availableProducts.length} fetched`, ); } } } const pricingWorkers = Array.from( - { length: Math.min(5, sellableProducts.length || 1) }, + { length: Math.min(5, availableProducts.length || 1) }, () => fetchNextPricing(), ); await Promise.all(pricingWorkers); @@ -157,9 +182,13 @@ async function main() { // Phase 6: Build enriched products console.log(`\nEnriching products...`); const enriched: EnrichedProduct[] = []; - const autoSkipResults: AnalysisResult[] = []; + const availableAsins = new Set(availableProducts.map((ap) => ap.asin)); for (const p of products) { + if (excludedCachedAsins.has(p.asin)) { + continue; + } + // Cached products — already enriched const cachedProduct = cached.get(p.asin); if (cachedProduct) { @@ -167,38 +196,12 @@ async function main() { continue; } - // Skipped products — not sellable, auto-SKIP - if (skippedProducts.some((sp) => sp.asin === p.asin)) { - const sellability = sellabilityMap.get(p.asin)!; - const product: EnrichedProduct = { - record: p, - keepa: null, - spApi: { - fbaFee: 0, - fbmFee: 0, - referralFeePercent: 15, - estimatedSalePrice: 0, - ...sellability, - }, - fetchedAt: new Date().toISOString(), - }; - autoSkipResults.push({ - product, - verdict: { - asin: p.asin, - verdict: "SKIP", - confidence: 100, - reasoning: - `Not sellable: ${sellability.sellabilityReason ?? sellability.sellabilityStatus}`.slice( - 0, - 100, - ), - }, - }); + // Exclude products that are not explicitly available. + if (!availableAsins.has(p.asin)) { continue; } - // Sellable products — full enrichment + // Available products — full enrichment const keepa = keepaResults.get(p.asin) ?? null; const spApi = spApiResults.get(p.asin) ?? { fbaFee: 5.0, @@ -229,7 +232,7 @@ async function main() { } } - // Phase 7: LLM analysis in batches — only for enriched (sellable + cached) products + // Phase 7: LLM analysis in batches — only for enriched available products console.log( `\nAnalyzing ${enriched.length} products via LLM (batch size: ${LLM_BATCH_SIZE})...\n`, ); @@ -274,16 +277,66 @@ async function main() { } } - // Merge: LLM-analyzed results + auto-skipped results - const allResults = [...results, ...autoSkipResults]; + return results; +} - printResults(allResults); +async function main() { + const { inputFile, outputFile } = parseArgs(); - if (outputFile) { - writeResultsCsv(allResults, outputFile); + console.log("Connecting to Redis..."); + await connectCache(); + + try { + // Phase 1: Read input file + console.log(`\nReading ${inputFile}...`); + const products = readProducts(inputFile); + + if (products.length === 0) { + console.error("No valid products found in input file."); + process.exit(1); + } + + const productChunks = chunkArray(products, INPUT_BATCH_SIZE); + const hasMultipleChunks = productChunks.length > 1; + const shouldWriteChunkFiles = hasMultipleChunks; + const resolvedBaseOutputPath = resolveBaseOutputPath(inputFile, outputFile); + const allResults: AnalysisResult[] = []; + + if (hasMultipleChunks) { + console.log( + `\nLarge input detected (${products.length} products). Processing in chunks of ${INPUT_BATCH_SIZE}.`, + ); + console.log( + `Chunk outputs will be written as numbered files using base: ${resolvedBaseOutputPath}`, + ); + } + + for (let chunkIndex = 0; chunkIndex < productChunks.length; chunkIndex++) { + const chunk = productChunks[chunkIndex]!; + console.log( + `\n=== Input chunk ${chunkIndex + 1}/${productChunks.length} (${chunk.length} products) ===`, + ); + + const chunkResults = await processProductChunk(chunk); + allResults.push(...chunkResults); + + if (shouldWriteChunkFiles) { + const chunkOutputPath = buildChunkOutputPath( + resolvedBaseOutputPath, + chunkIndex, + ); + writeResultsCsv(chunkResults, chunkOutputPath); + } + } + + printResults(allResults); + + if (!hasMultipleChunks && outputFile) { + writeResultsCsv(allResults, outputFile); + } + } finally { + await disconnectCache(); } - - await disconnectCache(); } main().catch((err) => {