feat: enhance README and index.ts for large file processing and output management

This commit is contained in:
Victor Noguera
2026-04-11 16:57:27 -04:00
parent 0162e54007
commit 4386560964
2 changed files with 143 additions and 84 deletions

View File

@@ -4,15 +4,18 @@ import { fetchSellabilityBatch, fetchSpApiPricingAndFees } from "./sp-api.ts";
import { connectCache, getCache, setCache, disconnectCache } from "./cache.ts";
import { analyzeProducts } from "./llm.ts";
import { printResults, writeResultsCsv } from "./writer.ts";
import path from "node:path";
import type {
EnrichedProduct,
AnalysisResult,
KeepaData,
ProductRecord,
SellabilityInfo,
SpApiData,
} from "./types.ts";
const LLM_BATCH_SIZE = 5;
const INPUT_BATCH_SIZE = 50;
function parseArgs(): { inputFile: string; outputFile?: string } {
const args = process.argv.slice(2);
@@ -29,41 +32,67 @@ function parseArgs(): { inputFile: string; outputFile?: string } {
return { inputFile, outputFile };
}
async function main() {
const { inputFile, outputFile } = parseArgs();
console.log("Connecting to Redis...");
await connectCache();
// Phase 1: Read input file
console.log(`\nReading ${inputFile}...`);
const products = readProducts(inputFile);
if (products.length === 0) {
console.error("No valid products found in input file.");
process.exit(1);
function chunkArray<T>(items: T[], chunkSize: number): T[][] {
const chunks: T[][] = [];
for (let i = 0; i < items.length; i += chunkSize) {
chunks.push(items.slice(i, i + chunkSize));
}
return chunks;
}
// Phase 2: Check cache for all ASINs
function resolveBaseOutputPath(inputFile: string, outputFile?: string): string {
if (outputFile) return outputFile;
const parsedInput = path.parse(inputFile);
return path.join(parsedInput.dir, `${parsedInput.name}_results.xlsx`);
}
function buildChunkOutputPath(
baseOutputPath: string,
chunkIndex: number,
): string {
const parsed = path.parse(baseOutputPath);
const extension = parsed.ext || ".xlsx";
const chunkSuffix = String(chunkIndex + 1).padStart(3, "0");
return path.join(
parsed.dir,
`${parsed.name}_part_${chunkSuffix}${extension}`,
);
}
async function processProductChunk(
products: ProductRecord[],
): Promise<AnalysisResult[]> {
// Phase 2: Check cache for all ASINs in chunk
console.log(`\nChecking cache for ${products.length} products...`);
const cached = new Map<string, EnrichedProduct>();
const excludedCachedAsins = new Set<string>();
const uncachedProducts: ProductRecord[] = [];
for (const p of products) {
const hit = await getCache(p.asin);
if (hit) {
console.log(` [cache hit] ${p.asin}`);
cached.set(p.asin, hit);
if (hit.spApi.sellabilityStatus === "available") {
console.log(` [cache hit] ${p.asin}`);
cached.set(p.asin, hit);
} else {
excludedCachedAsins.add(p.asin);
console.log(
` [exclude cached] ${p.asin} — status=${hit.spApi.sellabilityStatus}`,
);
}
} else {
uncachedProducts.push(p);
}
}
console.log(`${cached.size} cached, ${uncachedProducts.length} to fetch`);
console.log(
`${cached.size} cached available, ${excludedCachedAsins.size} cached excluded, ${uncachedProducts.length} to fetch`,
);
// Phase 3: Sellability gate — check all uncached ASINs before anything else
// Phase 3: Sellability gate — check uncached ASINs before anything else
const sellabilityMap = new Map<string, SellabilityInfo>();
const sellableProducts: ProductRecord[] = [];
const skippedProducts: ProductRecord[] = [];
const availableProducts: ProductRecord[] = [];
const unavailableProducts: ProductRecord[] = [];
if (uncachedProducts.length > 0) {
console.log(
@@ -81,50 +110,46 @@ async function main() {
};
sellabilityMap.set(p.asin, info);
// Keep: available, restricted (can request approval), unknown (proceed cautiously)
// Skip: not_available with canSell explicitly false
if (
info.sellabilityStatus === "not_available" &&
info.canSell === false
) {
skippedProducts.push(p);
// Keep only ASINs that are explicitly available.
if (info.sellabilityStatus === "available") {
availableProducts.push(p);
console.log(
` [skip] ${p.asin}${info.sellabilityReason ?? "not available"}`,
` [available] ${p.asin}status=${info.sellabilityStatus}`,
);
} else {
sellableProducts.push(p);
unavailableProducts.push(p);
console.log(
` [sellable] ${p.asin} — status=${info.sellabilityStatus}`,
` [exclude] ${p.asin} — status=${info.sellabilityStatus}, reason=${info.sellabilityReason ?? "n/a"}`,
);
}
}
console.log(
`\nSellability gate: ${sellableProducts.length} sellable, ${skippedProducts.length} skipped`,
`\nSellability gate: ${availableProducts.length} available, ${unavailableProducts.length} excluded`,
);
}
// Phase 4: Keepa batch fetch — only for sellable (uncached) ASINs
// Phase 4: Keepa batch fetch — only for available (uncached) ASINs
let keepaResults = new Map<string, KeepaData>();
if (sellableProducts.length > 0) {
console.log(`\nFetching ${sellableProducts.length} ASINs from Keepa...`);
if (availableProducts.length > 0) {
console.log(`\nFetching ${availableProducts.length} ASINs from Keepa...`);
try {
keepaResults = await fetchKeepaDataBatch(
sellableProducts.map((p) => p.asin),
availableProducts.map((p) => p.asin),
);
} catch (err) {
console.warn(`Keepa batch fetch failed: ${err}`);
}
}
// Phase 5: SP-API pricing + fees — only for sellable ASINs
// Phase 5: SP-API pricing + fees — only for available ASINs
console.log(
`\nFetching pricing & fees for ${sellableProducts.length} ASINs...`,
`\nFetching pricing & fees for ${availableProducts.length} ASINs...`,
);
const spApiResults = new Map<string, import("./types.ts").SpApiData>();
const spApiResults = new Map<string, SpApiData>();
// Concurrency-limited pricing+fees fetches
const pricingQueue = [...sellableProducts];
const pricingQueue = [...availableProducts];
let pricingDone = 0;
async function fetchNextPricing(): Promise<void> {
@@ -140,16 +165,16 @@ async function main() {
spApiResults.set(p.asin, spApi);
pricingDone++;
if (pricingDone % 10 === 0 || pricingDone === sellableProducts.length) {
if (pricingDone % 10 === 0 || pricingDone === availableProducts.length) {
console.log(
` [pricing] ${pricingDone}/${sellableProducts.length} fetched`,
` [pricing] ${pricingDone}/${availableProducts.length} fetched`,
);
}
}
}
const pricingWorkers = Array.from(
{ length: Math.min(5, sellableProducts.length || 1) },
{ length: Math.min(5, availableProducts.length || 1) },
() => fetchNextPricing(),
);
await Promise.all(pricingWorkers);
@@ -157,9 +182,13 @@ async function main() {
// Phase 6: Build enriched products
console.log(`\nEnriching products...`);
const enriched: EnrichedProduct[] = [];
const autoSkipResults: AnalysisResult[] = [];
const availableAsins = new Set(availableProducts.map((ap) => ap.asin));
for (const p of products) {
if (excludedCachedAsins.has(p.asin)) {
continue;
}
// Cached products — already enriched
const cachedProduct = cached.get(p.asin);
if (cachedProduct) {
@@ -167,38 +196,12 @@ async function main() {
continue;
}
// Skipped products — not sellable, auto-SKIP
if (skippedProducts.some((sp) => sp.asin === p.asin)) {
const sellability = sellabilityMap.get(p.asin)!;
const product: EnrichedProduct = {
record: p,
keepa: null,
spApi: {
fbaFee: 0,
fbmFee: 0,
referralFeePercent: 15,
estimatedSalePrice: 0,
...sellability,
},
fetchedAt: new Date().toISOString(),
};
autoSkipResults.push({
product,
verdict: {
asin: p.asin,
verdict: "SKIP",
confidence: 100,
reasoning:
`Not sellable: ${sellability.sellabilityReason ?? sellability.sellabilityStatus}`.slice(
0,
100,
),
},
});
// Exclude products that are not explicitly available.
if (!availableAsins.has(p.asin)) {
continue;
}
// Sellable products — full enrichment
// Available products — full enrichment
const keepa = keepaResults.get(p.asin) ?? null;
const spApi = spApiResults.get(p.asin) ?? {
fbaFee: 5.0,
@@ -229,7 +232,7 @@ async function main() {
}
}
// Phase 7: LLM analysis in batches — only for enriched (sellable + cached) products
// Phase 7: LLM analysis in batches — only for enriched available products
console.log(
`\nAnalyzing ${enriched.length} products via LLM (batch size: ${LLM_BATCH_SIZE})...\n`,
);
@@ -274,16 +277,66 @@ async function main() {
}
}
// Merge: LLM-analyzed results + auto-skipped results
const allResults = [...results, ...autoSkipResults];
return results;
}
printResults(allResults);
async function main() {
const { inputFile, outputFile } = parseArgs();
if (outputFile) {
writeResultsCsv(allResults, outputFile);
console.log("Connecting to Redis...");
await connectCache();
try {
// Phase 1: Read input file
console.log(`\nReading ${inputFile}...`);
const products = readProducts(inputFile);
if (products.length === 0) {
console.error("No valid products found in input file.");
process.exit(1);
}
const productChunks = chunkArray(products, INPUT_BATCH_SIZE);
const hasMultipleChunks = productChunks.length > 1;
const shouldWriteChunkFiles = hasMultipleChunks;
const resolvedBaseOutputPath = resolveBaseOutputPath(inputFile, outputFile);
const allResults: AnalysisResult[] = [];
if (hasMultipleChunks) {
console.log(
`\nLarge input detected (${products.length} products). Processing in chunks of ${INPUT_BATCH_SIZE}.`,
);
console.log(
`Chunk outputs will be written as numbered files using base: ${resolvedBaseOutputPath}`,
);
}
for (let chunkIndex = 0; chunkIndex < productChunks.length; chunkIndex++) {
const chunk = productChunks[chunkIndex]!;
console.log(
`\n=== Input chunk ${chunkIndex + 1}/${productChunks.length} (${chunk.length} products) ===`,
);
const chunkResults = await processProductChunk(chunk);
allResults.push(...chunkResults);
if (shouldWriteChunkFiles) {
const chunkOutputPath = buildChunkOutputPath(
resolvedBaseOutputPath,
chunkIndex,
);
writeResultsCsv(chunkResults, chunkOutputPath);
}
}
printResults(allResults);
if (!hasMultipleChunks && outputFile) {
writeResultsCsv(allResults, outputFile);
}
} finally {
await disconnectCache();
}
await disconnectCache();
}
main().catch((err) => {