diff --git a/README.md b/README.md
index f5f5afe..46c3298 100644
--- a/README.md
+++ b/README.md
@@ -31,6 +31,12 @@ bun run src/index.ts leads.xlsx
bun run src/index.ts leads.csv --out results.xlsx
```
+Large-file behavior:
+
+- If the input has more than 50 products, processing is done in chunks of 50.
+- Each chunk is analyzed and written to a numbered output file, for example: `results_part_001.xlsx`, `results_part_002.xlsx`, ...
+- If `--out` is omitted for large files, the base output name defaults to `_results.xlsx` and chunk files are still written with numbered suffixes.
+
Quick SP-API connectivity tests:
```bash
@@ -90,8 +96,9 @@ Numeric parsing accepts plain numbers as well as formatted values like `$12.50`,
3. **Sellability gate** — check all uncached ASINs against SP-API `getListingsRestrictions` (concurrency: 5 workers); immediately skip ASINs with status `not_available` and `canSell=false` (no Keepa/fees wasted)
4. **Keepa fetch** — batch the sellable (uncached) ASINs in a single API call (up to 100 per request)
5. **Enrich** — fetch SP-API pricing + FBA/FBM fees for sellable ASINs; combine with Keepa data and spreadsheet data
-6. **LLM analysis** — send batches of 5 sellable products to LM Studio for FBA/FBM/SKIP verdict; skipped ASINs get auto-SKIP verdict (confidence 100) and bypass LLM entirely
-7. **Output** — print results table to console (includes all ASINs), optionally write CSV/XLSX
+6. **LLM analysis** — send batches of 5 available products to LM Studio for FBA/FBM/SKIP verdict
+7. **Chunk orchestration** — if input size is greater than 50, run phases 2-6 for each 50-item chunk sequentially
+8. **Output** — print results table to console (includes all ASINs); for chunked runs, always write seriated chunk files (`*_part_001`, `*_part_002`, ...); for non-chunked runs, write a single file only when `--out` is provided
## Output columns
@@ -119,11 +126,10 @@ ASIN, Name, Brand, Category, Unit Cost, Current Price, Avg Price 90d, Sales Rank
## Notes
-- **Sellability-first optimization**: SP-API `getListingsRestrictions` is checked first to filter out unsellable items before consuming Keepa tokens or running full SP-API pricing/fees queries. This saves API calls and reduces runtime for large lead lists.
+- **Available-only processing**: SP-API `getListingsRestrictions` is checked first and only ASINs with `sellabilityStatus=available` are enriched, analyzed, and included in outputs. Restricted, not_available, and unknown items are excluded.
- **SP-API concurrency**: `fetchSellabilityBatch` limits concurrent requests to 5 workers to avoid 429 throttling. Pricing+fees fetches also use 5 concurrent workers.
- **No batch endpoint**: Amazon SP-API does not provide batch endpoints for `getListingsRestrictions` or `getMyFeesEstimate*`. Concurrency limiting with the library's built-in `auto_request_throttled` safety net prevents overwhelming the API.
- **Keepa rate limiting**: The client reads `tokensLeft` and `refillRate` from each API response and waits automatically when tokens are exhausted. With a Pro subscription (1 token/min), all 100 ASINs in a batch cost 1 token.
- **Redis is optional**: If Redis is unavailable the tool runs without caching — every run re-fetches from Keepa.
- **SP-API**: `src/sp-api.ts` provides `fetchSellability`, `fetchSellabilityBatch`, and `fetchSpApiPricingAndFees` functions. If SP-API credentials are missing or a call fails, the tool falls back to conservative fee defaults and keeps processing.
-- **Skipped ASINs**: Products with `not_available` sellability status and `canSell=false` appear in output with verdict `SKIP`, confidence 100, and reasoning from the sellability check. They do not consume LLM inference.
- **Sandbox vs production**: When `SP_API_USE_SANDBOX=true`, production ASIN calls can be denied. Use sandbox-compatible test data or set it to `false` for live marketplace connectivity.
diff --git a/src/index.ts b/src/index.ts
index 5f7d25b..e60ab2d 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -4,15 +4,18 @@ import { fetchSellabilityBatch, fetchSpApiPricingAndFees } from "./sp-api.ts";
import { connectCache, getCache, setCache, disconnectCache } from "./cache.ts";
import { analyzeProducts } from "./llm.ts";
import { printResults, writeResultsCsv } from "./writer.ts";
+import path from "node:path";
import type {
EnrichedProduct,
AnalysisResult,
KeepaData,
ProductRecord,
SellabilityInfo,
+ SpApiData,
} from "./types.ts";
const LLM_BATCH_SIZE = 5;
+const INPUT_BATCH_SIZE = 50;
function parseArgs(): { inputFile: string; outputFile?: string } {
const args = process.argv.slice(2);
@@ -29,41 +32,67 @@ function parseArgs(): { inputFile: string; outputFile?: string } {
return { inputFile, outputFile };
}
-async function main() {
- const { inputFile, outputFile } = parseArgs();
-
- console.log("Connecting to Redis...");
- await connectCache();
-
- // Phase 1: Read input file
- console.log(`\nReading ${inputFile}...`);
- const products = readProducts(inputFile);
-
- if (products.length === 0) {
- console.error("No valid products found in input file.");
- process.exit(1);
+function chunkArray(items: T[], chunkSize: number): T[][] {
+ const chunks: T[][] = [];
+ for (let i = 0; i < items.length; i += chunkSize) {
+ chunks.push(items.slice(i, i + chunkSize));
}
+ return chunks;
+}
- // Phase 2: Check cache for all ASINs
+function resolveBaseOutputPath(inputFile: string, outputFile?: string): string {
+ if (outputFile) return outputFile;
+
+ const parsedInput = path.parse(inputFile);
+ return path.join(parsedInput.dir, `${parsedInput.name}_results.xlsx`);
+}
+
+function buildChunkOutputPath(
+ baseOutputPath: string,
+ chunkIndex: number,
+): string {
+ const parsed = path.parse(baseOutputPath);
+ const extension = parsed.ext || ".xlsx";
+ const chunkSuffix = String(chunkIndex + 1).padStart(3, "0");
+ return path.join(
+ parsed.dir,
+ `${parsed.name}_part_${chunkSuffix}${extension}`,
+ );
+}
+
+async function processProductChunk(
+ products: ProductRecord[],
+): Promise {
+ // Phase 2: Check cache for all ASINs in chunk
console.log(`\nChecking cache for ${products.length} products...`);
const cached = new Map();
+ const excludedCachedAsins = new Set();
const uncachedProducts: ProductRecord[] = [];
for (const p of products) {
const hit = await getCache(p.asin);
if (hit) {
- console.log(` [cache hit] ${p.asin}`);
- cached.set(p.asin, hit);
+ if (hit.spApi.sellabilityStatus === "available") {
+ console.log(` [cache hit] ${p.asin}`);
+ cached.set(p.asin, hit);
+ } else {
+ excludedCachedAsins.add(p.asin);
+ console.log(
+ ` [exclude cached] ${p.asin} — status=${hit.spApi.sellabilityStatus}`,
+ );
+ }
} else {
uncachedProducts.push(p);
}
}
- console.log(`${cached.size} cached, ${uncachedProducts.length} to fetch`);
+ console.log(
+ `${cached.size} cached available, ${excludedCachedAsins.size} cached excluded, ${uncachedProducts.length} to fetch`,
+ );
- // Phase 3: Sellability gate — check all uncached ASINs before anything else
+ // Phase 3: Sellability gate — check uncached ASINs before anything else
const sellabilityMap = new Map();
- const sellableProducts: ProductRecord[] = [];
- const skippedProducts: ProductRecord[] = [];
+ const availableProducts: ProductRecord[] = [];
+ const unavailableProducts: ProductRecord[] = [];
if (uncachedProducts.length > 0) {
console.log(
@@ -81,50 +110,46 @@ async function main() {
};
sellabilityMap.set(p.asin, info);
- // Keep: available, restricted (can request approval), unknown (proceed cautiously)
- // Skip: not_available with canSell explicitly false
- if (
- info.sellabilityStatus === "not_available" &&
- info.canSell === false
- ) {
- skippedProducts.push(p);
+ // Keep only ASINs that are explicitly available.
+ if (info.sellabilityStatus === "available") {
+ availableProducts.push(p);
console.log(
- ` [skip] ${p.asin} — ${info.sellabilityReason ?? "not available"}`,
+ ` [available] ${p.asin} — status=${info.sellabilityStatus}`,
);
} else {
- sellableProducts.push(p);
+ unavailableProducts.push(p);
console.log(
- ` [sellable] ${p.asin} — status=${info.sellabilityStatus}`,
+ ` [exclude] ${p.asin} — status=${info.sellabilityStatus}, reason=${info.sellabilityReason ?? "n/a"}`,
);
}
}
console.log(
- `\nSellability gate: ${sellableProducts.length} sellable, ${skippedProducts.length} skipped`,
+ `\nSellability gate: ${availableProducts.length} available, ${unavailableProducts.length} excluded`,
);
}
- // Phase 4: Keepa batch fetch — only for sellable (uncached) ASINs
+ // Phase 4: Keepa batch fetch — only for available (uncached) ASINs
let keepaResults = new Map();
- if (sellableProducts.length > 0) {
- console.log(`\nFetching ${sellableProducts.length} ASINs from Keepa...`);
+ if (availableProducts.length > 0) {
+ console.log(`\nFetching ${availableProducts.length} ASINs from Keepa...`);
try {
keepaResults = await fetchKeepaDataBatch(
- sellableProducts.map((p) => p.asin),
+ availableProducts.map((p) => p.asin),
);
} catch (err) {
console.warn(`Keepa batch fetch failed: ${err}`);
}
}
- // Phase 5: SP-API pricing + fees — only for sellable ASINs
+ // Phase 5: SP-API pricing + fees — only for available ASINs
console.log(
- `\nFetching pricing & fees for ${sellableProducts.length} ASINs...`,
+ `\nFetching pricing & fees for ${availableProducts.length} ASINs...`,
);
- const spApiResults = new Map();
+ const spApiResults = new Map();
// Concurrency-limited pricing+fees fetches
- const pricingQueue = [...sellableProducts];
+ const pricingQueue = [...availableProducts];
let pricingDone = 0;
async function fetchNextPricing(): Promise {
@@ -140,16 +165,16 @@ async function main() {
spApiResults.set(p.asin, spApi);
pricingDone++;
- if (pricingDone % 10 === 0 || pricingDone === sellableProducts.length) {
+ if (pricingDone % 10 === 0 || pricingDone === availableProducts.length) {
console.log(
- ` [pricing] ${pricingDone}/${sellableProducts.length} fetched`,
+ ` [pricing] ${pricingDone}/${availableProducts.length} fetched`,
);
}
}
}
const pricingWorkers = Array.from(
- { length: Math.min(5, sellableProducts.length || 1) },
+ { length: Math.min(5, availableProducts.length || 1) },
() => fetchNextPricing(),
);
await Promise.all(pricingWorkers);
@@ -157,9 +182,13 @@ async function main() {
// Phase 6: Build enriched products
console.log(`\nEnriching products...`);
const enriched: EnrichedProduct[] = [];
- const autoSkipResults: AnalysisResult[] = [];
+ const availableAsins = new Set(availableProducts.map((ap) => ap.asin));
for (const p of products) {
+ if (excludedCachedAsins.has(p.asin)) {
+ continue;
+ }
+
// Cached products — already enriched
const cachedProduct = cached.get(p.asin);
if (cachedProduct) {
@@ -167,38 +196,12 @@ async function main() {
continue;
}
- // Skipped products — not sellable, auto-SKIP
- if (skippedProducts.some((sp) => sp.asin === p.asin)) {
- const sellability = sellabilityMap.get(p.asin)!;
- const product: EnrichedProduct = {
- record: p,
- keepa: null,
- spApi: {
- fbaFee: 0,
- fbmFee: 0,
- referralFeePercent: 15,
- estimatedSalePrice: 0,
- ...sellability,
- },
- fetchedAt: new Date().toISOString(),
- };
- autoSkipResults.push({
- product,
- verdict: {
- asin: p.asin,
- verdict: "SKIP",
- confidence: 100,
- reasoning:
- `Not sellable: ${sellability.sellabilityReason ?? sellability.sellabilityStatus}`.slice(
- 0,
- 100,
- ),
- },
- });
+ // Exclude products that are not explicitly available.
+ if (!availableAsins.has(p.asin)) {
continue;
}
- // Sellable products — full enrichment
+ // Available products — full enrichment
const keepa = keepaResults.get(p.asin) ?? null;
const spApi = spApiResults.get(p.asin) ?? {
fbaFee: 5.0,
@@ -229,7 +232,7 @@ async function main() {
}
}
- // Phase 7: LLM analysis in batches — only for enriched (sellable + cached) products
+ // Phase 7: LLM analysis in batches — only for enriched available products
console.log(
`\nAnalyzing ${enriched.length} products via LLM (batch size: ${LLM_BATCH_SIZE})...\n`,
);
@@ -274,16 +277,66 @@ async function main() {
}
}
- // Merge: LLM-analyzed results + auto-skipped results
- const allResults = [...results, ...autoSkipResults];
+ return results;
+}
- printResults(allResults);
+async function main() {
+ const { inputFile, outputFile } = parseArgs();
- if (outputFile) {
- writeResultsCsv(allResults, outputFile);
+ console.log("Connecting to Redis...");
+ await connectCache();
+
+ try {
+ // Phase 1: Read input file
+ console.log(`\nReading ${inputFile}...`);
+ const products = readProducts(inputFile);
+
+ if (products.length === 0) {
+ console.error("No valid products found in input file.");
+ process.exit(1);
+ }
+
+ const productChunks = chunkArray(products, INPUT_BATCH_SIZE);
+ const hasMultipleChunks = productChunks.length > 1;
+ const shouldWriteChunkFiles = hasMultipleChunks;
+ const resolvedBaseOutputPath = resolveBaseOutputPath(inputFile, outputFile);
+ const allResults: AnalysisResult[] = [];
+
+ if (hasMultipleChunks) {
+ console.log(
+ `\nLarge input detected (${products.length} products). Processing in chunks of ${INPUT_BATCH_SIZE}.`,
+ );
+ console.log(
+ `Chunk outputs will be written as numbered files using base: ${resolvedBaseOutputPath}`,
+ );
+ }
+
+ for (let chunkIndex = 0; chunkIndex < productChunks.length; chunkIndex++) {
+ const chunk = productChunks[chunkIndex]!;
+ console.log(
+ `\n=== Input chunk ${chunkIndex + 1}/${productChunks.length} (${chunk.length} products) ===`,
+ );
+
+ const chunkResults = await processProductChunk(chunk);
+ allResults.push(...chunkResults);
+
+ if (shouldWriteChunkFiles) {
+ const chunkOutputPath = buildChunkOutputPath(
+ resolvedBaseOutputPath,
+ chunkIndex,
+ );
+ writeResultsCsv(chunkResults, chunkOutputPath);
+ }
+ }
+
+ printResults(allResults);
+
+ if (!hasMultipleChunks && outputFile) {
+ writeResultsCsv(allResults, outputFile);
+ }
+ } finally {
+ await disconnectCache();
}
-
- await disconnectCache();
}
main().catch((err) => {