Refactor SP-API test script and improve type definitions
- Updated `sp-test.ts` to enhance argument parsing and error handling for sellability checks. - Refactored `types.ts` to maintain consistent formatting and improve readability. - Improved `writer.ts` for better result handling and CSV writing, ensuring clarity in output. - Adjusted `tsconfig.json` formatting for consistency and readability.
This commit is contained in:
690
src/index.ts
690
src/index.ts
@@ -1,345 +1,345 @@
|
||||
import { readProducts } from "./reader.ts";
|
||||
import { fetchKeepaDataBatch } from "./keepa.ts";
|
||||
import { fetchSellabilityBatch, fetchSpApiPricingAndFees } from "./sp-api.ts";
|
||||
import { connectCache, getCache, setCache, disconnectCache } from "./cache.ts";
|
||||
import { analyzeProducts } from "./llm.ts";
|
||||
import { printResults, writeResultsCsv } from "./writer.ts";
|
||||
import path from "node:path";
|
||||
import type {
|
||||
EnrichedProduct,
|
||||
AnalysisResult,
|
||||
KeepaData,
|
||||
ProductRecord,
|
||||
SellabilityInfo,
|
||||
SpApiData,
|
||||
} from "./types.ts";
|
||||
|
||||
const LLM_BATCH_SIZE = 5;
|
||||
const INPUT_BATCH_SIZE = 50;
|
||||
|
||||
function parseArgs(): { inputFile: string; outputFile?: string } {
|
||||
const args = process.argv.slice(2);
|
||||
const inputFile = args.find((a) => !a.startsWith("--"));
|
||||
const outIdx = args.indexOf("--out");
|
||||
const outputFile = outIdx !== -1 ? args[outIdx + 1] : undefined;
|
||||
|
||||
if (!inputFile) {
|
||||
console.error(
|
||||
"Usage: bun run src/index.ts <input.csv|xlsx> [--out results.csv]",
|
||||
);
|
||||
process.exit(1);
|
||||
}
|
||||
return { inputFile, outputFile };
|
||||
}
|
||||
|
||||
function chunkArray<T>(items: T[], chunkSize: number): T[][] {
|
||||
const chunks: T[][] = [];
|
||||
for (let i = 0; i < items.length; i += chunkSize) {
|
||||
chunks.push(items.slice(i, i + chunkSize));
|
||||
}
|
||||
return chunks;
|
||||
}
|
||||
|
||||
function resolveBaseOutputPath(inputFile: string, outputFile?: string): string {
|
||||
if (outputFile) return outputFile;
|
||||
|
||||
const parsedInput = path.parse(inputFile);
|
||||
return path.join(parsedInput.dir, `${parsedInput.name}_results.xlsx`);
|
||||
}
|
||||
|
||||
function buildChunkOutputPath(
|
||||
baseOutputPath: string,
|
||||
chunkIndex: number,
|
||||
): string {
|
||||
const parsed = path.parse(baseOutputPath);
|
||||
const extension = parsed.ext || ".xlsx";
|
||||
const chunkSuffix = String(chunkIndex + 1).padStart(3, "0");
|
||||
return path.join(
|
||||
parsed.dir,
|
||||
`${parsed.name}_part_${chunkSuffix}${extension}`,
|
||||
);
|
||||
}
|
||||
|
||||
async function processProductChunk(
|
||||
products: ProductRecord[],
|
||||
): Promise<AnalysisResult[]> {
|
||||
// Phase 2: Check cache for all ASINs in chunk
|
||||
console.log(`\nChecking cache for ${products.length} products...`);
|
||||
const cached = new Map<string, EnrichedProduct>();
|
||||
const excludedCachedAsins = new Set<string>();
|
||||
const uncachedProducts: ProductRecord[] = [];
|
||||
|
||||
for (const p of products) {
|
||||
const hit = await getCache(p.asin);
|
||||
if (hit) {
|
||||
if (hit.spApi.sellabilityStatus === "available") {
|
||||
console.log(` [cache hit] ${p.asin}`);
|
||||
cached.set(p.asin, hit);
|
||||
} else {
|
||||
excludedCachedAsins.add(p.asin);
|
||||
console.log(
|
||||
` [exclude cached] ${p.asin} — status=${hit.spApi.sellabilityStatus}`,
|
||||
);
|
||||
}
|
||||
} else {
|
||||
uncachedProducts.push(p);
|
||||
}
|
||||
}
|
||||
console.log(
|
||||
`${cached.size} cached available, ${excludedCachedAsins.size} cached excluded, ${uncachedProducts.length} to fetch`,
|
||||
);
|
||||
|
||||
// Phase 3: Sellability gate — check uncached ASINs before anything else
|
||||
const sellabilityMap = new Map<string, SellabilityInfo>();
|
||||
const availableProducts: ProductRecord[] = [];
|
||||
const unavailableProducts: ProductRecord[] = [];
|
||||
|
||||
if (uncachedProducts.length > 0) {
|
||||
console.log(
|
||||
`\nChecking sellability for ${uncachedProducts.length} ASINs...`,
|
||||
);
|
||||
const sellResults = await fetchSellabilityBatch(
|
||||
uncachedProducts.map((p) => p.asin),
|
||||
);
|
||||
|
||||
for (const p of uncachedProducts) {
|
||||
const info = sellResults.get(p.asin) ?? {
|
||||
canSell: null,
|
||||
sellabilityStatus: "unknown" as const,
|
||||
sellabilityReason: "Sellability check returned no result",
|
||||
};
|
||||
sellabilityMap.set(p.asin, info);
|
||||
|
||||
// Keep only ASINs that are explicitly available.
|
||||
if (info.sellabilityStatus === "available") {
|
||||
availableProducts.push(p);
|
||||
console.log(
|
||||
` [available] ${p.asin} — status=${info.sellabilityStatus}`,
|
||||
);
|
||||
} else {
|
||||
unavailableProducts.push(p);
|
||||
console.log(
|
||||
` [exclude] ${p.asin} — status=${info.sellabilityStatus}, reason=${info.sellabilityReason ?? "n/a"}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
console.log(
|
||||
`\nSellability gate: ${availableProducts.length} available, ${unavailableProducts.length} excluded`,
|
||||
);
|
||||
}
|
||||
|
||||
// Phase 4: Keepa batch fetch — only for available (uncached) ASINs
|
||||
let keepaResults = new Map<string, KeepaData>();
|
||||
if (availableProducts.length > 0) {
|
||||
console.log(`\nFetching ${availableProducts.length} ASINs from Keepa...`);
|
||||
try {
|
||||
keepaResults = await fetchKeepaDataBatch(
|
||||
availableProducts.map((p) => p.asin),
|
||||
);
|
||||
} catch (err) {
|
||||
console.warn(`Keepa batch fetch failed: ${err}`);
|
||||
}
|
||||
}
|
||||
|
||||
// Phase 5: SP-API pricing + fees — only for available ASINs
|
||||
console.log(
|
||||
`\nFetching pricing & fees for ${availableProducts.length} ASINs...`,
|
||||
);
|
||||
const spApiResults = new Map<string, SpApiData>();
|
||||
|
||||
// Concurrency-limited pricing+fees fetches
|
||||
const pricingQueue = [...availableProducts];
|
||||
let pricingDone = 0;
|
||||
|
||||
async function fetchNextPricing(): Promise<void> {
|
||||
while (pricingQueue.length > 0) {
|
||||
const p = pricingQueue.shift()!;
|
||||
const sellability = sellabilityMap.get(p.asin)!;
|
||||
const spApi = await fetchSpApiPricingAndFees(p.asin, sellability);
|
||||
|
||||
const keepa = keepaResults.get(p.asin);
|
||||
if (keepa?.currentPrice && spApi.estimatedSalePrice === 0) {
|
||||
spApi.estimatedSalePrice = keepa.currentPrice;
|
||||
}
|
||||
|
||||
spApiResults.set(p.asin, spApi);
|
||||
pricingDone++;
|
||||
if (pricingDone % 10 === 0 || pricingDone === availableProducts.length) {
|
||||
console.log(
|
||||
` [pricing] ${pricingDone}/${availableProducts.length} fetched`,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const pricingWorkers = Array.from(
|
||||
{ length: Math.min(5, availableProducts.length || 1) },
|
||||
() => fetchNextPricing(),
|
||||
);
|
||||
await Promise.all(pricingWorkers);
|
||||
|
||||
// Phase 6: Build enriched products
|
||||
console.log(`\nEnriching products...`);
|
||||
const enriched: EnrichedProduct[] = [];
|
||||
const availableAsins = new Set(availableProducts.map((ap) => ap.asin));
|
||||
|
||||
for (const p of products) {
|
||||
if (excludedCachedAsins.has(p.asin)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Cached products — already enriched
|
||||
const cachedProduct = cached.get(p.asin);
|
||||
if (cachedProduct) {
|
||||
enriched.push(cachedProduct);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Exclude products that are not explicitly available.
|
||||
if (!availableAsins.has(p.asin)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Available products — full enrichment
|
||||
const keepa = keepaResults.get(p.asin) ?? null;
|
||||
const spApi = spApiResults.get(p.asin) ?? {
|
||||
fbaFee: 5.0,
|
||||
fbmFee: 1.5,
|
||||
referralFeePercent: 15,
|
||||
estimatedSalePrice: 0,
|
||||
canSell: null,
|
||||
sellabilityStatus: "unknown" as const,
|
||||
sellabilityReason: "SP-API data missing",
|
||||
};
|
||||
|
||||
const product: EnrichedProduct = {
|
||||
record: p,
|
||||
keepa,
|
||||
spApi,
|
||||
fetchedAt: new Date().toISOString(),
|
||||
};
|
||||
|
||||
await setCache(p.asin, product);
|
||||
enriched.push(product);
|
||||
|
||||
if (keepa) {
|
||||
console.log(
|
||||
` [enriched] ${p.asin} — price: $${keepa.currentPrice ?? "N/A"}, rank: ${keepa.salesRank ?? "N/A"}`,
|
||||
);
|
||||
} else {
|
||||
console.log(` [no keepa] ${p.asin} — using spreadsheet data only`);
|
||||
}
|
||||
}
|
||||
|
||||
// Phase 7: LLM analysis in batches — only for enriched available products
|
||||
console.log(
|
||||
`\nAnalyzing ${enriched.length} products via LLM (batch size: ${LLM_BATCH_SIZE})...\n`,
|
||||
);
|
||||
|
||||
const results: AnalysisResult[] = [];
|
||||
for (let i = 0; i < enriched.length; i += LLM_BATCH_SIZE) {
|
||||
const batch = enriched.slice(i, i + LLM_BATCH_SIZE);
|
||||
const batchNum = Math.floor(i / LLM_BATCH_SIZE) + 1;
|
||||
const totalBatches = Math.ceil(enriched.length / LLM_BATCH_SIZE);
|
||||
console.log(` LLM batch ${batchNum}/${totalBatches}...`);
|
||||
|
||||
// Wait between batches to avoid overwhelming LM Studio
|
||||
if (i > 0) {
|
||||
console.log(` Waiting 5s before next batch...`);
|
||||
await new Promise((r) => setTimeout(r, 5000));
|
||||
}
|
||||
|
||||
let verdicts;
|
||||
try {
|
||||
verdicts = await analyzeProducts(batch);
|
||||
} catch {
|
||||
console.warn(` LLM batch error, retrying after 10s...`);
|
||||
await new Promise((r) => setTimeout(r, 10_000));
|
||||
try {
|
||||
verdicts = await analyzeProducts(batch);
|
||||
} catch (retryErr) {
|
||||
console.error(` LLM analysis failed: ${retryErr}`);
|
||||
verdicts = null;
|
||||
}
|
||||
}
|
||||
|
||||
for (let j = 0; j < batch.length; j++) {
|
||||
results.push({
|
||||
product: batch[j]!,
|
||||
verdict: verdicts?.[j] ?? {
|
||||
asin: batch[j]!.record.asin,
|
||||
verdict: "SKIP",
|
||||
confidence: 0,
|
||||
reasoning: "LLM analysis failed",
|
||||
},
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const { inputFile, outputFile } = parseArgs();
|
||||
|
||||
console.log("Connecting to Redis...");
|
||||
await connectCache();
|
||||
|
||||
try {
|
||||
// Phase 1: Read input file
|
||||
console.log(`\nReading ${inputFile}...`);
|
||||
const products = readProducts(inputFile);
|
||||
|
||||
if (products.length === 0) {
|
||||
console.error("No valid products found in input file.");
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const productChunks = chunkArray(products, INPUT_BATCH_SIZE);
|
||||
const hasMultipleChunks = productChunks.length > 1;
|
||||
const shouldWriteChunkFiles = hasMultipleChunks;
|
||||
const resolvedBaseOutputPath = resolveBaseOutputPath(inputFile, outputFile);
|
||||
const allResults: AnalysisResult[] = [];
|
||||
|
||||
if (hasMultipleChunks) {
|
||||
console.log(
|
||||
`\nLarge input detected (${products.length} products). Processing in chunks of ${INPUT_BATCH_SIZE}.`,
|
||||
);
|
||||
console.log(
|
||||
`Chunk outputs will be written as numbered files using base: ${resolvedBaseOutputPath}`,
|
||||
);
|
||||
}
|
||||
|
||||
for (let chunkIndex = 0; chunkIndex < productChunks.length; chunkIndex++) {
|
||||
const chunk = productChunks[chunkIndex]!;
|
||||
console.log(
|
||||
`\n=== Input chunk ${chunkIndex + 1}/${productChunks.length} (${chunk.length} products) ===`,
|
||||
);
|
||||
|
||||
const chunkResults = await processProductChunk(chunk);
|
||||
allResults.push(...chunkResults);
|
||||
|
||||
if (shouldWriteChunkFiles) {
|
||||
const chunkOutputPath = buildChunkOutputPath(
|
||||
resolvedBaseOutputPath,
|
||||
chunkIndex,
|
||||
);
|
||||
writeResultsCsv(chunkResults, chunkOutputPath);
|
||||
}
|
||||
}
|
||||
|
||||
printResults(allResults);
|
||||
|
||||
if (!hasMultipleChunks && outputFile) {
|
||||
writeResultsCsv(allResults, outputFile);
|
||||
}
|
||||
} finally {
|
||||
await disconnectCache();
|
||||
}
|
||||
}
|
||||
|
||||
main().catch((err) => {
|
||||
console.error("Fatal error:", err);
|
||||
process.exit(1);
|
||||
});
|
||||
import { readProducts } from "./reader.ts";
|
||||
import { fetchKeepaDataBatch } from "./keepa.ts";
|
||||
import { fetchSellabilityBatch, fetchSpApiPricingAndFees } from "./sp-api.ts";
|
||||
import { connectCache, getCache, setCache, disconnectCache } from "./cache.ts";
|
||||
import { analyzeProducts } from "./llm.ts";
|
||||
import { printResults, writeResultsCsv } from "./writer.ts";
|
||||
import path from "node:path";
|
||||
import type {
|
||||
EnrichedProduct,
|
||||
AnalysisResult,
|
||||
KeepaData,
|
||||
ProductRecord,
|
||||
SellabilityInfo,
|
||||
SpApiData,
|
||||
} from "./types.ts";
|
||||
|
||||
const LLM_BATCH_SIZE = 5;
|
||||
const INPUT_BATCH_SIZE = 50;
|
||||
|
||||
function parseArgs(): { inputFile: string; outputFile?: string } {
|
||||
const args = process.argv.slice(2);
|
||||
const inputFile = args.find((a) => !a.startsWith("--"));
|
||||
const outIdx = args.indexOf("--out");
|
||||
const outputFile = outIdx !== -1 ? args[outIdx + 1] : undefined;
|
||||
|
||||
if (!inputFile) {
|
||||
console.error(
|
||||
"Usage: bun run src/index.ts <input.csv|xlsx> [--out results.csv]",
|
||||
);
|
||||
process.exit(1);
|
||||
}
|
||||
return { inputFile, outputFile };
|
||||
}
|
||||
|
||||
function chunkArray<T>(items: T[], chunkSize: number): T[][] {
|
||||
const chunks: T[][] = [];
|
||||
for (let i = 0; i < items.length; i += chunkSize) {
|
||||
chunks.push(items.slice(i, i + chunkSize));
|
||||
}
|
||||
return chunks;
|
||||
}
|
||||
|
||||
function resolveBaseOutputPath(inputFile: string, outputFile?: string): string {
|
||||
if (outputFile) return outputFile;
|
||||
|
||||
const parsedInput = path.parse(inputFile);
|
||||
return path.join(parsedInput.dir, `${parsedInput.name}_results.xlsx`);
|
||||
}
|
||||
|
||||
function buildChunkOutputPath(
|
||||
baseOutputPath: string,
|
||||
chunkIndex: number,
|
||||
): string {
|
||||
const parsed = path.parse(baseOutputPath);
|
||||
const extension = parsed.ext || ".xlsx";
|
||||
const chunkSuffix = String(chunkIndex + 1).padStart(3, "0");
|
||||
return path.join(
|
||||
parsed.dir,
|
||||
`${parsed.name}_part_${chunkSuffix}${extension}`,
|
||||
);
|
||||
}
|
||||
|
||||
async function processProductChunk(
|
||||
products: ProductRecord[],
|
||||
): Promise<AnalysisResult[]> {
|
||||
// Phase 2: Check cache for all ASINs in chunk
|
||||
console.log(`\nChecking cache for ${products.length} products...`);
|
||||
const cached = new Map<string, EnrichedProduct>();
|
||||
const excludedCachedAsins = new Set<string>();
|
||||
const uncachedProducts: ProductRecord[] = [];
|
||||
|
||||
for (const p of products) {
|
||||
const hit = await getCache(p.asin);
|
||||
if (hit) {
|
||||
if (hit.spApi.sellabilityStatus === "available") {
|
||||
console.log(` [cache hit] ${p.asin}`);
|
||||
cached.set(p.asin, hit);
|
||||
} else {
|
||||
excludedCachedAsins.add(p.asin);
|
||||
console.log(
|
||||
` [exclude cached] ${p.asin} — status=${hit.spApi.sellabilityStatus}`,
|
||||
);
|
||||
}
|
||||
} else {
|
||||
uncachedProducts.push(p);
|
||||
}
|
||||
}
|
||||
console.log(
|
||||
`${cached.size} cached available, ${excludedCachedAsins.size} cached excluded, ${uncachedProducts.length} to fetch`,
|
||||
);
|
||||
|
||||
// Phase 3: Sellability gate — check uncached ASINs before anything else
|
||||
const sellabilityMap = new Map<string, SellabilityInfo>();
|
||||
const availableProducts: ProductRecord[] = [];
|
||||
const unavailableProducts: ProductRecord[] = [];
|
||||
|
||||
if (uncachedProducts.length > 0) {
|
||||
console.log(
|
||||
`\nChecking sellability for ${uncachedProducts.length} ASINs...`,
|
||||
);
|
||||
const sellResults = await fetchSellabilityBatch(
|
||||
uncachedProducts.map((p) => p.asin),
|
||||
);
|
||||
|
||||
for (const p of uncachedProducts) {
|
||||
const info = sellResults.get(p.asin) ?? {
|
||||
canSell: null,
|
||||
sellabilityStatus: "unknown" as const,
|
||||
sellabilityReason: "Sellability check returned no result",
|
||||
};
|
||||
sellabilityMap.set(p.asin, info);
|
||||
|
||||
// Keep only ASINs that are explicitly available.
|
||||
if (info.sellabilityStatus === "available") {
|
||||
availableProducts.push(p);
|
||||
console.log(
|
||||
` [available] ${p.asin} — status=${info.sellabilityStatus}`,
|
||||
);
|
||||
} else {
|
||||
unavailableProducts.push(p);
|
||||
console.log(
|
||||
` [exclude] ${p.asin} — status=${info.sellabilityStatus}, reason=${info.sellabilityReason ?? "n/a"}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
console.log(
|
||||
`\nSellability gate: ${availableProducts.length} available, ${unavailableProducts.length} excluded`,
|
||||
);
|
||||
}
|
||||
|
||||
// Phase 4: Keepa batch fetch — only for available (uncached) ASINs
|
||||
let keepaResults = new Map<string, KeepaData>();
|
||||
if (availableProducts.length > 0) {
|
||||
console.log(`\nFetching ${availableProducts.length} ASINs from Keepa...`);
|
||||
try {
|
||||
keepaResults = await fetchKeepaDataBatch(
|
||||
availableProducts.map((p) => p.asin),
|
||||
);
|
||||
} catch (err) {
|
||||
console.warn(`Keepa batch fetch failed: ${err}`);
|
||||
}
|
||||
}
|
||||
|
||||
// Phase 5: SP-API pricing + fees — only for available ASINs
|
||||
console.log(
|
||||
`\nFetching pricing & fees for ${availableProducts.length} ASINs...`,
|
||||
);
|
||||
const spApiResults = new Map<string, SpApiData>();
|
||||
|
||||
// Concurrency-limited pricing+fees fetches
|
||||
const pricingQueue = [...availableProducts];
|
||||
let pricingDone = 0;
|
||||
|
||||
async function fetchNextPricing(): Promise<void> {
|
||||
while (pricingQueue.length > 0) {
|
||||
const p = pricingQueue.shift()!;
|
||||
const sellability = sellabilityMap.get(p.asin)!;
|
||||
const spApi = await fetchSpApiPricingAndFees(p.asin, sellability);
|
||||
|
||||
const keepa = keepaResults.get(p.asin);
|
||||
if (keepa?.currentPrice && spApi.estimatedSalePrice === 0) {
|
||||
spApi.estimatedSalePrice = keepa.currentPrice;
|
||||
}
|
||||
|
||||
spApiResults.set(p.asin, spApi);
|
||||
pricingDone++;
|
||||
if (pricingDone % 10 === 0 || pricingDone === availableProducts.length) {
|
||||
console.log(
|
||||
` [pricing] ${pricingDone}/${availableProducts.length} fetched`,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const pricingWorkers = Array.from(
|
||||
{ length: Math.min(5, availableProducts.length || 1) },
|
||||
() => fetchNextPricing(),
|
||||
);
|
||||
await Promise.all(pricingWorkers);
|
||||
|
||||
// Phase 6: Build enriched products
|
||||
console.log(`\nEnriching products...`);
|
||||
const enriched: EnrichedProduct[] = [];
|
||||
const availableAsins = new Set(availableProducts.map((ap) => ap.asin));
|
||||
|
||||
for (const p of products) {
|
||||
if (excludedCachedAsins.has(p.asin)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Cached products — already enriched
|
||||
const cachedProduct = cached.get(p.asin);
|
||||
if (cachedProduct) {
|
||||
enriched.push(cachedProduct);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Exclude products that are not explicitly available.
|
||||
if (!availableAsins.has(p.asin)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Available products — full enrichment
|
||||
const keepa = keepaResults.get(p.asin) ?? null;
|
||||
const spApi = spApiResults.get(p.asin) ?? {
|
||||
fbaFee: 5.0,
|
||||
fbmFee: 1.5,
|
||||
referralFeePercent: 15,
|
||||
estimatedSalePrice: 0,
|
||||
canSell: null,
|
||||
sellabilityStatus: "unknown" as const,
|
||||
sellabilityReason: "SP-API data missing",
|
||||
};
|
||||
|
||||
const product: EnrichedProduct = {
|
||||
record: p,
|
||||
keepa,
|
||||
spApi,
|
||||
fetchedAt: new Date().toISOString(),
|
||||
};
|
||||
|
||||
await setCache(p.asin, product);
|
||||
enriched.push(product);
|
||||
|
||||
if (keepa) {
|
||||
console.log(
|
||||
` [enriched] ${p.asin} — price: $${keepa.currentPrice ?? "N/A"}, rank: ${keepa.salesRank ?? "N/A"}`,
|
||||
);
|
||||
} else {
|
||||
console.log(` [no keepa] ${p.asin} — using spreadsheet data only`);
|
||||
}
|
||||
}
|
||||
|
||||
// Phase 7: LLM analysis in batches — only for enriched available products
|
||||
console.log(
|
||||
`\nAnalyzing ${enriched.length} products via LLM (batch size: ${LLM_BATCH_SIZE})...\n`,
|
||||
);
|
||||
|
||||
const results: AnalysisResult[] = [];
|
||||
for (let i = 0; i < enriched.length; i += LLM_BATCH_SIZE) {
|
||||
const batch = enriched.slice(i, i + LLM_BATCH_SIZE);
|
||||
const batchNum = Math.floor(i / LLM_BATCH_SIZE) + 1;
|
||||
const totalBatches = Math.ceil(enriched.length / LLM_BATCH_SIZE);
|
||||
console.log(` LLM batch ${batchNum}/${totalBatches}...`);
|
||||
|
||||
// Wait between batches to avoid overwhelming LM Studio
|
||||
if (i > 0) {
|
||||
console.log(` Waiting 5s before next batch...`);
|
||||
await new Promise((r) => setTimeout(r, 5000));
|
||||
}
|
||||
|
||||
let verdicts;
|
||||
try {
|
||||
verdicts = await analyzeProducts(batch);
|
||||
} catch {
|
||||
console.warn(` LLM batch error, retrying after 10s...`);
|
||||
await new Promise((r) => setTimeout(r, 10_000));
|
||||
try {
|
||||
verdicts = await analyzeProducts(batch);
|
||||
} catch (retryErr) {
|
||||
console.error(` LLM analysis failed: ${retryErr}`);
|
||||
verdicts = null;
|
||||
}
|
||||
}
|
||||
|
||||
for (let j = 0; j < batch.length; j++) {
|
||||
results.push({
|
||||
product: batch[j]!,
|
||||
verdict: verdicts?.[j] ?? {
|
||||
asin: batch[j]!.record.asin,
|
||||
verdict: "SKIP",
|
||||
confidence: 0,
|
||||
reasoning: "LLM analysis failed",
|
||||
},
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const { inputFile, outputFile } = parseArgs();
|
||||
|
||||
console.log("Connecting to Redis...");
|
||||
await connectCache();
|
||||
|
||||
try {
|
||||
// Phase 1: Read input file
|
||||
console.log(`\nReading ${inputFile}...`);
|
||||
const products = readProducts(inputFile);
|
||||
|
||||
if (products.length === 0) {
|
||||
console.error("No valid products found in input file.");
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const productChunks = chunkArray(products, INPUT_BATCH_SIZE);
|
||||
const hasMultipleChunks = productChunks.length > 1;
|
||||
const shouldWriteChunkFiles = hasMultipleChunks;
|
||||
const resolvedBaseOutputPath = resolveBaseOutputPath(inputFile, outputFile);
|
||||
const allResults: AnalysisResult[] = [];
|
||||
|
||||
if (hasMultipleChunks) {
|
||||
console.log(
|
||||
`\nLarge input detected (${products.length} products). Processing in chunks of ${INPUT_BATCH_SIZE}.`,
|
||||
);
|
||||
console.log(
|
||||
`Chunk outputs will be written as numbered files using base: ${resolvedBaseOutputPath}`,
|
||||
);
|
||||
}
|
||||
|
||||
for (let chunkIndex = 0; chunkIndex < productChunks.length; chunkIndex++) {
|
||||
const chunk = productChunks[chunkIndex]!;
|
||||
console.log(
|
||||
`\n=== Input chunk ${chunkIndex + 1}/${productChunks.length} (${chunk.length} products) ===`,
|
||||
);
|
||||
|
||||
const chunkResults = await processProductChunk(chunk);
|
||||
allResults.push(...chunkResults);
|
||||
|
||||
if (shouldWriteChunkFiles) {
|
||||
const chunkOutputPath = buildChunkOutputPath(
|
||||
resolvedBaseOutputPath,
|
||||
chunkIndex,
|
||||
);
|
||||
writeResultsCsv(chunkResults, chunkOutputPath);
|
||||
}
|
||||
}
|
||||
|
||||
printResults(allResults);
|
||||
|
||||
if (!hasMultipleChunks && outputFile) {
|
||||
writeResultsCsv(allResults, outputFile);
|
||||
}
|
||||
} finally {
|
||||
await disconnectCache();
|
||||
}
|
||||
}
|
||||
|
||||
main().catch((err) => {
|
||||
console.error("Fatal error:", err);
|
||||
process.exit(1);
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user