import path from "node:path"; import { requireAsin } from "../asin.ts"; import { fetchKeepaDataBatch, lookupKeepaUpcs } from "../integrations/keepa.ts"; import { fetchSellabilityBatch, fetchSpApiPricingAndFees, lookupSpApiUpcs, } from "../integrations/sp-api.ts"; import { processUpcFileInBatches, type UpcInputRow, } from "./upc-file-reader.ts"; import { appendSupplierResultsToRun, completeRunInDb, failRunInDb, refreshRunCountsInDb, startRunInDb, type RunCounts, } from "../writer.ts"; import { connectCache, disconnectCache } from "../integrations/cache.ts"; import { scoreSupplierProduct, resolveSupplierSalePrice } from "./supplier-scoring.ts"; import { writeSupplierWorkbook, type SupplierExportSummary, } from "./supplier-export.ts"; import type { KeepaUpcLookupDetail, KeepaUpcLookupStatus, ProductRecord, SupplierAnalysisResult, SupplierScore, UpcLookupDetail, } from "../types.ts"; const DEFAULT_INPUT_BATCH_SIZE = 200; const DEFAULT_UPC_LOOKUP_BATCH_SIZE = 100; const DEFAULT_PRICING_CONCURRENCY = 5; export type UpcFileAnalysisOptions = { inputFile: string; outputFile?: string; inputBatchSize?: number; upcLookupBatchSize?: number; maxRows?: number; manageResources?: boolean; dbPath?: string; }; export type UpcFileAnalysisSummary = { runId: number; inputFile: string; outputFile?: string; processedRows: number; matchedRows: number; unresolvedByStatus: Record; runCounts: RunCounts; reader: { mode: "xlsx_stream" | "xlsx_fallback" | "xls_fallback"; totalRowsSeen: number; emittedRows: number; skippedMissingUpc: number; skippedInvalidUpc: number; }; }; function printUsage(): void { console.log("Usage:"); console.log( " bun run src/upc-file-analysis.ts --input input/ [--out output/results.xlsx] [--input-batch-size 200] [--upc-lookup-batch-size 100] [--max-rows 1000]", ); } function parsePositiveInt(value: string | undefined, flagName: string): number { const parsed = Number.parseInt(String(value), 10); if (!Number.isFinite(parsed) || parsed < 1) { throw new Error(`Invalid value for ${flagName}: ${value}`); } return parsed; } function parseArgs(argv: string[]): UpcFileAnalysisOptions { let inputFile: string | undefined; let outputFile: string | undefined; let inputBatchSize: number | undefined; let upcLookupBatchSize: number | undefined; let maxRows: number | undefined; for (let i = 0; i < argv.length; i++) { const arg = argv[i]!; if (arg === "--help" || arg === "-h") { printUsage(); process.exit(0); } if (arg === "--input") { const next = argv[i + 1]; if (!next) throw new Error("Missing value after --input"); inputFile = next; i++; continue; } if (arg === "--out") { const next = argv[i + 1]; if (!next) throw new Error("Missing value after --out"); outputFile = next; i++; continue; } if (arg === "--input-batch-size") { inputBatchSize = parsePositiveInt(argv[i + 1], "--input-batch-size"); i++; continue; } if (arg === "--upc-lookup-batch-size") { upcLookupBatchSize = parsePositiveInt( argv[i + 1], "--upc-lookup-batch-size", ); i++; continue; } if (arg === "--max-rows") { maxRows = parsePositiveInt(argv[i + 1], "--max-rows"); i++; continue; } if (arg.startsWith("--")) { throw new Error(`Unknown flag: ${arg}`); } if (!inputFile) { inputFile = arg; continue; } throw new Error(`Unexpected positional argument: ${arg}`); } if (!inputFile) { throw new Error("Missing --input "); } return { inputFile, outputFile, inputBatchSize, upcLookupBatchSize, maxRows, }; } function resolveDefaultOutputPath(inputFile: string): string { const parsedInput = path.parse(inputFile); return path.join("output", `${parsedInput.name}_upc_results.xlsx`); } function createStatusCounter(): Record { return { found: 0, invalid_upc: 0, not_found: 0, multiple_asins: 0, request_failed: 0, }; } function chunkArray(items: T[], chunkSize: number): T[][] { const chunks: T[][] = []; for (let i = 0; i < items.length; i += chunkSize) { chunks.push(items.slice(i, i + chunkSize)); } return chunks; } function skippedScore(reason: string): SupplierScore { return { salePrice: null, fbaFee: null, profit: null, margin: null, roi: null, demandScore: 0, competitionPenalty: 1, score: 0, verdict: "SKIP", reason, }; } async function lookupUpcsWithChunking( rows: UpcInputRow[], lookupBatchSize: number, runCache: Map, ): Promise> { const uniqueUpcs = Array.from(new Set(rows.map((row) => row.upc))); const missingUpcs = uniqueUpcs.filter((upc) => !runCache.has(upc)); const chunks = chunkArray(missingUpcs, lookupBatchSize); const details = new Map(); const cacheHits = uniqueUpcs.length - missingUpcs.length; if (cacheHits > 0) { console.log( ` Reusing cached UPC lookup results for ${cacheHits}/${uniqueUpcs.length} UPCs in this batch.`, ); } if (missingUpcs.length === 0) { for (const upc of uniqueUpcs) { const detail = runCache.get(upc); if (detail) details.set(upc, detail); } return details; } for (let i = 0; i < chunks.length; i++) { const chunk = chunks[i]!; console.log( ` SP-API UPC lookup chunk ${i + 1}/${chunks.length} (${chunk.length} UPCs)...`, ); const spDetails = await lookupSpApiUpcs(chunk); const fallbackUpcs = Array.from(spDetails.values()) .filter( (detail) => detail.status === "not_found" || detail.status === "request_failed", ) .map((detail) => detail.normalizedUpc); const fallbackDetails = fallbackUpcs.length > 0 ? await lookupKeepaUpcs(fallbackUpcs) : new Map(); const chunkDetails = new Map(); for (const upc of chunk) { const spDetail = spDetails.get(upc); const fallbackDetail = fallbackDetails.get(upc); chunkDetails.set( upc, fallbackDetail && fallbackDetail.status !== "request_failed" ? { ...fallbackDetail, provider: "keepa" } : { ...spDetail!, provider: "sp_api" }, ); } for (const [upc, detail] of chunkDetails.entries()) { runCache.set(upc, detail); } } for (const upc of uniqueUpcs) { const detail = runCache.get(upc); if (detail) { details.set(upc, detail); } } return details; } function toProductRecord( row: UpcInputRow, detail: UpcLookupDetail, ): ProductRecord { const keepaCategory = detail.keepaData?.categoryTree?.[0]; return { asin: requireAsin(detail.asin), name: row.name ?? detail.asin ?? row.upc, unitCost: row.unitCost ?? 0, brand: row.brand, category: row.category ?? keepaCategory, }; } function toSupplierInputRecord(row: UpcInputRow) { return { name: row.name ?? row.upc, unitCost: row.unitCost ?? 0, brand: row.brand, category: row.category, }; } async function fetchFeesForProducts( products: ProductRecord[], keepaResults: Map>, sellabilityMap: Awaited>, ): Promise>> { const spApiResults = new Map>(); const queue = [...products]; let completed = 0; async function next(): Promise { while (queue.length > 0) { const product = queue.shift(); if (!product) return; const sellability = sellabilityMap.get(product.asin) ?? { canSell: null, sellabilityStatus: "unknown" as const, sellabilityReason: "Sellability check returned no result", }; const price = resolveSupplierSalePrice( keepaResults.get(product.asin) ?? null, null, ); const spApi = await fetchSpApiPricingAndFees(product.asin, sellability, price); spApiResults.set(product.asin, spApi); completed++; if (completed % 10 === 0 || completed === products.length) { console.log(` [fees] ${completed}/${products.length} fetched`); } } } const workers = Array.from( { length: Math.min(DEFAULT_PRICING_CONCURRENCY, products.length || 1) }, () => next(), ); await Promise.all(workers); return spApiResults; } function summarizeSupplierResults( results: SupplierAnalysisResult[], unresolvedByStatus: Record, ): SupplierExportSummary { return { processedRows: results.length, resolvedRows: results.filter((result) => result.lookup.status === "found").length, eligibleRows: results.filter( (result) => result.spApi?.sellabilityStatus === "available", ).length, verdictCounts: { BUY: results.filter((result) => result.score.verdict === "BUY").length, WATCH: results.filter((result) => result.score.verdict === "WATCH").length, SKIP: results.filter((result) => result.score.verdict === "SKIP").length, }, unresolvedByStatus, }; } export async function runUpcFileAnalysis( options: UpcFileAnalysisOptions, ): Promise { const inputBatchSize = Math.max( 1, options.inputBatchSize ?? DEFAULT_INPUT_BATCH_SIZE, ); const lookupBatchSize = Math.max( 1, options.upcLookupBatchSize ?? DEFAULT_UPC_LOOKUP_BATCH_SIZE, ); const outputFile = options.outputFile ?? resolveDefaultOutputPath(options.inputFile); const manageResources = options.manageResources ?? true; if (manageResources) { console.log("Connecting to Redis..."); await connectCache(); } const unresolvedByStatus = createStatusCounter(); const allResults: SupplierAnalysisResult[] = []; const upcLookupCache = new Map(); let processedRows = 0; let matchedRows = 0; const runId = await startRunInDb(options.inputFile, outputFile, undefined, "supplier_upc"); try { const readerSummary = await processUpcFileInBatches( options.inputFile, async ({ batchNumber, rows }) => { console.log( `\n=== UPC input batch ${batchNumber} (${rows.length} rows) ===`, ); processedRows += rows.length; const detailMap = await lookupUpcsWithChunking( rows, lookupBatchSize, upcLookupCache, ); const matchedEntries: Array<{ row: UpcInputRow; detail: UpcLookupDetail; product: ProductRecord; }> = []; for (const row of rows) { const detail = detailMap.get(row.upc) ?? ({ requestedUpc: row.upc, normalizedUpc: row.upc, status: "request_failed", asin: null, candidateAsins: [], keepaData: null, provider: "sp_api", reason: "UPC lookup returned no result", } satisfies UpcLookupDetail); if (!detailMap.has(row.upc)) detailMap.set(row.upc, detail); unresolvedByStatus[detail.status] += 1; if (detail.status === "found" && detail.asin) { matchedRows += 1; matchedEntries.push({ row, detail, product: toProductRecord(row, detail), }); } } const matchedProducts = matchedEntries.map((entry) => entry.product); console.log( `Batch ${batchNumber}: ${matchedProducts.length}/${rows.length} rows resolved to single ASINs`, ); const batchResults: SupplierAnalysisResult[] = []; for (const row of rows) { const detail = detailMap.get(row.upc)!; if (detail.status === "found") continue; batchResults.push({ upc: row.upc, rowNumber: row.rowNumber, record: toSupplierInputRecord(row), product: null, lookup: detail, keepa: null, spApi: null, score: skippedScore(detail?.reason ?? "UPC unresolved"), fetchedAt: new Date().toISOString(), }); } if (matchedProducts.length > 0) { console.log(`Fetching ${matchedProducts.length} ASINs from Keepa...`); const keepaResults = await fetchKeepaDataBatch( matchedProducts.map((product) => product.asin), ); console.log(`Checking sellability for ${matchedProducts.length} ASINs...`); const sellabilityMap = await fetchSellabilityBatch( matchedProducts.map((product) => product.asin), ); console.log(`Fetching fees for ${matchedProducts.length} ASINs...`); const spApiResults = await fetchFeesForProducts( matchedProducts, keepaResults, sellabilityMap, ); for (const entry of matchedEntries) { const keepa = keepaResults.get(entry.product.asin) ?? entry.detail.keepaData ?? null; const spApi = spApiResults.get(entry.product.asin) ?? null; batchResults.push({ upc: entry.detail.normalizedUpc, rowNumber: entry.row.rowNumber, record: toSupplierInputRecord(entry.row), product: entry.product, lookup: entry.detail, keepa, spApi, score: scoreSupplierProduct(entry.product, keepa, spApi), fetchedAt: new Date().toISOString(), }); } } await appendSupplierResultsToRun(runId, batchResults); allResults.push(...batchResults); }, { batchSize: inputBatchSize, maxRows: options.maxRows, }, ); const runCounts = await refreshRunCountsInDb(runId); const exportSummary = summarizeSupplierResults(allResults, unresolvedByStatus); await writeSupplierWorkbook(outputFile, allResults, exportSummary); await completeRunInDb(runId); if (allResults.length > 0) { const ranked = allResults .filter((result) => result.score.verdict !== "SKIP") .sort((a, b) => b.score.score - a.score.score) .slice(0, 25) .map((result) => ({ UPC: result.upc, ASIN: result.lookup.asin ?? "", Name: result.record.name.slice(0, 40), Cost: result.record.unitCost, Price: result.score.salePrice ?? "", Profit: result.score.profit ?? "", ROI: result.score.roi == null ? "" : `${Math.round(result.score.roi * 100)}%`, Score: result.score.score, Verdict: result.score.verdict, Reason: result.score.reason, })); console.log("\n=== Top Supplier Leads ===\n"); console.table(ranked); } else { console.log("No supplier rows were analyzed."); } console.log(`Ranked workbook written: ${outputFile}`); return { runId, inputFile: options.inputFile, outputFile, processedRows, matchedRows, unresolvedByStatus, runCounts, reader: { mode: readerSummary.mode, totalRowsSeen: readerSummary.totalRowsSeen, emittedRows: readerSummary.emittedRows, skippedMissingUpc: readerSummary.skippedMissingUpc, skippedInvalidUpc: readerSummary.skippedInvalidUpc, }, }; } catch (error) { await failRunInDb(runId, error); throw error; } finally { if (manageResources) { await disconnectCache(); } } } async function main(): Promise { const parsed = parseArgs(process.argv.slice(2)); const summary = await runUpcFileAnalysis(parsed); console.log("\n=== UPC file analysis summary ==="); console.log(JSON.stringify(summary, null, 2)); } if (import.meta.main) { main().catch((err) => { const message = err instanceof Error ? err.message : String(err); console.error(`UPC file analysis failed: ${message}`); process.exit(1); }); }