import { existsSync, mkdirSync, readFileSync } from "node:fs"; import path from "node:path"; import { db } from "./db/index.ts"; import { runs, categoryProductResults } from "./db/schema.ts"; import { eq, sql } from "drizzle-orm"; import { config } from "./config.ts"; import { analyzeProducts } from "./llm.ts"; import { fetchSellabilityBatch, fetchSpApiPricingAndFees } from "./sp-api.ts"; import type { AnalysisResult, EnrichedProduct, KeepaData, LlmVerdict, ProductRecord, SellabilityInfo, SpApiData, } from "./types.ts"; type CategoryInfo = { id: number; label: string; parentId: number; childCount: number; }; type ParsedArgs = { outputDir: string; categoryLimit: number; perCategoryTop: number; categoryCandidatePool: number; minMonthlySold: number; blacklistFile: string; useClaude: boolean; }; type CategoryRunSummary = { categoryId: number; categoryLabel: string; topAsinsChecked: number; availableAsins: number; fba: number; fbm: number; skip: number; status: "running" | "ok" | "empty" | "failed"; error: string; runId?: number; results?: AnalysisResult[]; }; const KEEPA_BASE = "https://api.keepa.com"; const DOMAIN_US = 1; const AMAZON_US_SELLER_ID = "ATVPDKIKX0DER"; const KEEPA_MINUTES_OFFSET = 21_564_000; const DEFAULT_CATEGORY_LIMIT = 32; const DEFAULT_PER_CATEGORY_TOP = 100; const DEFAULT_CATEGORY_CANDIDATE_POOL = 500; const DEFAULT_MIN_MONTHLY_SOLD = 300; const SELLABILITY_BATCH_SIZE = 60; const LLM_BATCH_SIZE = 10; const PRICING_CONCURRENCY = 5; const KEEPA_PRODUCT_CHUNK_SIZE = 100; const DEFAULT_BLACKLIST_FILE = path.join( process.cwd(), "category-blacklist.csv", ); let keepaTokensLeft = 1; let keepaRefillRate = 1; let keepaLastRequestMs = 0; function log( level: "info" | "warn" | "error", message: string, ...args: any[] ) { const timestamp = new Date().toISOString(); console.log(`[${timestamp}] [${level.toUpperCase()}] ${message}`, ...args); } function parseArgs(): ParsedArgs { const args = process.argv.slice(2); const useClaude = hasFlag(args, "--claude"); const outputDir = readFlagValue(args, "--out-dir") ?? path.join(process.cwd(), "output"); const blacklistFile = readFlagValue(args, "--blacklist-file") ?? DEFAULT_BLACKLIST_FILE; const categoryLimitRaw = readFlagValue(args, "--category-limit"); const perCategoryTopRaw = readFlagValue(args, "--per-category-top"); const categoryCandidatePoolRaw = readFlagValue( args, "--category-candidate-pool", ); const minMonthlySoldRaw = readFlagValue(args, "--min-monthly-sold"); const categoryLimit = categoryLimitRaw ? Number(categoryLimitRaw) : DEFAULT_CATEGORY_LIMIT; const perCategoryTop = perCategoryTopRaw ? Number(perCategoryTopRaw) : DEFAULT_PER_CATEGORY_TOP; const categoryCandidatePool = categoryCandidatePoolRaw ? Number(categoryCandidatePoolRaw) : DEFAULT_CATEGORY_CANDIDATE_POOL; const minMonthlySold = minMonthlySoldRaw ? Number(minMonthlySoldRaw) : DEFAULT_MIN_MONTHLY_SOLD; if (!Number.isInteger(categoryLimit) || categoryLimit <= 0) { printUsageAndExit("--category-limit must be a positive integer."); } if (!Number.isInteger(perCategoryTop) || perCategoryTop <= 0) { printUsageAndExit("--per-category-top must be a positive integer."); } if (!Number.isInteger(categoryCandidatePool) || categoryCandidatePool <= 0) { printUsageAndExit("--category-candidate-pool must be a positive integer."); } if (categoryCandidatePool < perCategoryTop) { printUsageAndExit( "--category-candidate-pool must be greater than or equal to --per-category-top.", ); } if (!Number.isInteger(minMonthlySold) || minMonthlySold < 0) { printUsageAndExit("--min-monthly-sold must be a non-negative integer."); } return { outputDir, categoryLimit, perCategoryTop, categoryCandidatePool, minMonthlySold, blacklistFile, useClaude, }; } function hasFlag(args: string[], flag: string): boolean { return args.includes(flag); } function readFlagValue(args: string[], flag: string): string | undefined { const idx = args.indexOf(flag); if (idx === -1) return undefined; return args[idx + 1]; } function printUsageAndExit(message: string): never { if (message) { log("error", message); } log( "error", [ "Usage:", " bun run src/top-monthly-sold-by-category.ts [--category-limit 32] [--per-category-top 100] [--category-candidate-pool 500] [--min-monthly-sold 300] [--out-dir output] [--blacklist-file category-blacklist.csv] [--claude]", "", "Flow:", " 1) Discover categories and round-robin selection.", " 2) For each category: fetch a candidate pool, then keep only sellable ASINs.", " 3) Rank sellable ASINs by Keepa monthlySold and keep top N where monthlySold >= threshold.", " 4) Enrich selected ASINs with Keepa + SP-API pricing/fees.", " 5) LLM-analyze and persist per category.", ].join("\n"), ); process.exit(1); } export async function insertCategoryRunSummary( summary: CategoryRunSummary, runTimestamp: string, ): Promise { const [row] = await db .insert(runs) .values({ type: "category_analysis", status: (summary.status as typeof runs.$inferInsert.status) ?? "running", categoryId: summary.categoryId, categoryLabel: summary.categoryLabel, topAsinsChecked: summary.topAsinsChecked, availableAsins: summary.availableAsins, totalProducts: summary.topAsinsChecked, fbaCount: summary.fba, fbmCount: summary.fbm, skipCount: summary.skip, errorMessage: summary.error || null, startedAt: new Date(runTimestamp), }) .returning({ id: runs.id }); if (!row) throw new Error("Failed to insert category run."); return row.id; } export async function updateCategoryRunSummary( runId: number, summary: Pick< CategoryRunSummary, | "topAsinsChecked" | "availableAsins" | "fba" | "fbm" | "skip" | "status" | "error" >, ): Promise { await db .update(runs) .set({ topAsinsChecked: summary.topAsinsChecked, availableAsins: summary.availableAsins, totalProducts: summary.topAsinsChecked, fbaCount: summary.fba, fbmCount: summary.fbm, skipCount: summary.skip, status: summary.status as typeof runs.$inferInsert.status, errorMessage: summary.error || null, ...(summary.status !== "running" ? { completedAt: new Date() } : {}), }) .where(eq(runs.id, runId)); } export async function insertProductAnalysisResults( runId: number, results: AnalysisResult[], ): Promise { if (results.length === 0) return; const rows = results.map((r) => { const price = r.product.keepa?.currentPrice ?? r.product.record.sellingPriceFromSheet ?? r.product.spApi.estimatedSalePrice; const rank = r.product.keepa?.salesRank ?? r.product.record.amazonRank; return { asin: r.product.record.asin, runId, name: r.product.record.name, brand: r.product.record.brand ?? null, category: r.product.record.category ?? r.product.keepa?.categoryTree?.join(" > ") ?? null, unitCost: r.product.record.unitCost ?? null, currentPrice: price ?? null, avgPrice90d: r.product.keepa?.avgPrice90 ?? null, avgPrice90dSheet: r.product.record.avgPrice90FromSheet ?? null, sellingPriceSheet: r.product.record.sellingPriceFromSheet ?? null, salesRank: rank ?? null, salesRankAvg90d: r.product.keepa?.salesRankAvg90 ?? null, sellerCount: r.product.keepa?.sellerCount ?? null, amazonIsSeller: r.product.keepa?.amazonIsSeller ?? null, amazonBuyboxSharePct90d: r.product.keepa?.amazonBuyboxSharePct90d ?? null, monthlySold: r.product.keepa?.monthlySold ?? null, rankDrops30d: r.product.keepa?.salesRankDrops30 ?? null, rankDrops90d: r.product.keepa?.salesRankDrops90 ?? null, fbaFee: r.product.spApi.fbaFee ?? null, fbmFee: r.product.spApi.fbmFee ?? null, referralPercent: r.product.spApi.referralFeePercent ?? null, canSell: r.product.spApi.canSell == null ? "unknown" : r.product.spApi.canSell ? "yes" : "no", sellabilityStatus: r.product.spApi.sellabilityStatus ?? null, sellabilityReason: r.product.spApi.sellabilityReason ?? null, verdict: r.verdict.verdict, confidence: r.verdict.confidence, reasoning: r.verdict.reasoning ?? null, fetchedAt: new Date(r.product.fetchedAt), }; }); await db .insert(categoryProductResults) .values(rows) .onConflictDoUpdate({ target: categoryProductResults.asin, set: { runId: sql`EXCLUDED.run_id`, name: sql`EXCLUDED.name`, brand: sql`EXCLUDED.brand`, category: sql`EXCLUDED.category`, unitCost: sql`EXCLUDED.unit_cost`, currentPrice: sql`EXCLUDED.current_price`, avgPrice90d: sql`EXCLUDED.avg_price_90d`, avgPrice90dSheet: sql`EXCLUDED.avg_price_90d_sheet`, sellingPriceSheet: sql`EXCLUDED.selling_price_sheet`, salesRank: sql`EXCLUDED.sales_rank`, salesRankAvg90d: sql`EXCLUDED.sales_rank_avg_90d`, sellerCount: sql`EXCLUDED.seller_count`, amazonIsSeller: sql`EXCLUDED.amazon_is_seller`, amazonBuyboxSharePct90d: sql`EXCLUDED.amazon_buybox_share_pct_90d`, monthlySold: sql`EXCLUDED.monthly_sold`, rankDrops30d: sql`EXCLUDED.rank_drops_30d`, rankDrops90d: sql`EXCLUDED.rank_drops_90d`, fbaFee: sql`EXCLUDED.fba_fee`, fbmFee: sql`EXCLUDED.fbm_fee`, referralPercent: sql`EXCLUDED.referral_percent`, canSell: sql`EXCLUDED.can_sell`, sellabilityStatus: sql`EXCLUDED.sellability_status`, sellabilityReason: sql`EXCLUDED.sellability_reason`, verdict: sql`EXCLUDED.verdict`, confidence: sql`EXCLUDED.confidence`, reasoning: sql`EXCLUDED.reasoning`, fetchedAt: sql`EXCLUDED.fetched_at`, }, }); } function loadCategoryBlacklist(filePath: string): Set { const blacklist = new Set(); if (!existsSync(filePath)) { log( "warn", `Blacklist file not found at ${filePath}; continuing with no excluded categories.`, ); return blacklist; } const raw = readFileSync(filePath, "utf8"); const lines = raw.split(/\r?\n/); for (let i = 0; i < lines.length; i++) { const lineNumber = i + 1; const line = lines[i] ?? ""; const trimmed = line.trim(); if (!trimmed || trimmed.startsWith("#")) continue; const [idPart, namePart] = trimmed.split(",", 2); const idToken = idPart?.trim() ?? ""; const nameToken = namePart?.trim() ?? ""; // Allow header row: id,name if (idToken.toLowerCase() === "id") { continue; } if (!idToken) { log( "warn", `Blacklist CSV line ${lineNumber}: missing id, row ignored (${trimmed}).`, ); continue; } const id = Number(idToken); if (!Number.isInteger(id) || id <= 0) { log( "warn", `Blacklist CSV line ${lineNumber}: invalid id '${idToken}', row ignored (${trimmed}).`, ); continue; } if (!nameToken) { log( "warn", `Blacklist CSV line ${lineNumber}: missing name for id ${id}; accepted but please add name.`, ); } if (blacklist.has(id)) { log( "warn", `Blacklist CSV line ${lineNumber}: duplicate id ${id}, keeping first occurrence.`, ); continue; } blacklist.add(id); } return blacklist; } function assertSpApiPrerequisites(): void { const missing: string[] = []; if (!config.spApiClientId) missing.push("SP_API_CLIENT_ID"); if (!config.spApiClientSecret) missing.push("SP_API_CLIENT_SECRET"); if (!config.spApiRefreshToken) missing.push("SP_API_REFRESH_TOKEN"); if (!config.spApiSellerId) missing.push("SP_API_SELLER_ID"); if (missing.length > 0) { throw new Error(`Missing required SP-API env vars: ${missing.join(", ")}`); } } function sleep(ms: number): Promise { return new Promise((resolve) => setTimeout(resolve, ms)); } function sanitizeFileSegment(value: string): string { const compact = value.trim().toLowerCase().replace(/\s+/g, "-"); const safe = compact.replace(/[^a-z0-9-_]+/g, "-").replace(/-+/g, "-"); return safe.replace(/^-|-$/g, "") || "category"; } function parseKeepaRateLimitPayload(text: string): { refillInMs?: number; tokensLeft?: number; refillRate?: number; } { try { const parsed = JSON.parse(text); return { refillInMs: typeof parsed?.refillIn === "number" && Number.isFinite(parsed.refillIn) ? Math.max(0, parsed.refillIn) : undefined, tokensLeft: typeof parsed?.tokensLeft === "number" && Number.isFinite(parsed.tokensLeft) ? parsed.tokensLeft : undefined, refillRate: typeof parsed?.refillRate === "number" && Number.isFinite(parsed.refillRate) ? parsed.refillRate : undefined, }; } catch { return {}; } } function computeBackoffMs(attempt: number, refillInMs?: number): number { const refillBased = refillInMs != null ? refillInMs + 1500 : 0; const exponential = Math.min(60_000, 2 ** attempt * 1000); const base = Math.max(refillBased, exponential); return base + Math.floor(Math.random() * 750); } async function waitForKeepaToken(): Promise { if (keepaTokensLeft > 0) return; const elapsedMinutes = (Date.now() - keepaLastRequestMs) / 60_000; const regenerated = Math.floor(elapsedMinutes * keepaRefillRate); if (regenerated > 0) { keepaTokensLeft += regenerated; return; } const waitMs = Math.ceil((1 / keepaRefillRate) * 60_000) - (Date.now() - keepaLastRequestMs); if (waitMs > 0) { log( "info", `Keepa tokens depleted; waiting ${Math.ceil(waitMs / 1000)}s...`, ); await sleep(waitMs); } keepaTokensLeft = 1; } async function keepaGetJson(pathAndQuery: string): Promise { let rateLimitHits = 0; while (true) { await waitForKeepaToken(); const response = await fetch(`${KEEPA_BASE}${pathAndQuery}`); keepaLastRequestMs = Date.now(); if (response.ok) { const data = (await response.json()) as any; if (typeof data?.tokensLeft === "number") { keepaTokensLeft = data.tokensLeft; } if (typeof data?.refillRate === "number" && data.refillRate > 0) { keepaRefillRate = data.refillRate; } return data; } const text = await response.text(); if (response.status === 429) { const rate = parseKeepaRateLimitPayload(text); if (typeof rate.tokensLeft === "number") { keepaTokensLeft = rate.tokensLeft; } if (typeof rate.refillRate === "number" && rate.refillRate > 0) { keepaRefillRate = rate.refillRate; } rateLimitHits++; const waitMs = computeBackoffMs(rateLimitHits, rate.refillInMs); log( "warn", `Keepa rate limited (429). Retry ${rateLimitHits} in ${Math.ceil(waitMs / 1000)}s...`, ); await sleep(waitMs); continue; } throw new Error(`Keepa HTTP ${response.status}: ${text}`); } } function normalizeCategoryList(data: any): CategoryInfo[] { const deduped = new Map(); const addRawCategory = (value: any): void => { const id = Number( value?.catId ?? value?.categoryId ?? value?.id ?? value?.nodeId, ); if (!Number.isInteger(id) || id <= 0) return; const label = String( value?.name ?? value?.label ?? `Category ${id}`, ).trim(); const parentId = Number(value?.parent ?? value?.parentId ?? -1); const childCount = Array.isArray(value?.children) ? value.children.length : Number.isInteger(value?.childCount) ? Number(value.childCount) : 0; if (id === 0 || label.toLowerCase() === "root" || parentId === -1) { return; } if (!deduped.has(id)) { deduped.set(id, { id, label: label || `Category ${id}`, parentId, childCount: Math.max(0, childCount), }); } }; if (Array.isArray(data?.categories)) { for (const value of data.categories) { addRawCategory(value); } } if (data?.categories && typeof data.categories === "object") { for (const value of Object.values(data.categories)) { addRawCategory(value); } } if (Array.isArray(data?.categoryList)) { for (const value of data.categoryList) { addRawCategory(value); } } return [...deduped.values()]; } function prioritizeLikelyBestsellerCategories( categories: CategoryInfo[], ): CategoryInfo[] { const leaves: CategoryInfo[] = []; const nonLeaves: CategoryInfo[] = []; for (const category of categories) { if (category.childCount === 0) { leaves.push(category); } else { nonLeaves.push(category); } } const withNamedLabels = (list: CategoryInfo[]) => list.filter((c) => !/^Category\s+\d+$/i.test(c.label)); const withFallbackLabels = (list: CategoryInfo[]) => list.filter((c) => /^Category\s+\d+$/i.test(c.label)); return [ ...withNamedLabels(leaves), ...withFallbackLabels(leaves), ...withNamedLabels(nonLeaves), ...withFallbackLabels(nonLeaves), ]; } function resolveRootCategory( category: CategoryInfo, byId: Map, ): CategoryInfo { let current = category; const seen = new Set(); while (current.parentId > 0 && !seen.has(current.id)) { seen.add(current.id); const parent = byId.get(current.parentId); if (!parent) break; current = parent; } return current; } function selectCategoriesAcrossRoots( categories: CategoryInfo[], maxCategories: number, ): CategoryInfo[] { const byId = new Map(categories.map((c) => [c.id, c])); const grouped = new Map(); for (const category of categories) { const root = resolveRootCategory(category, byId); const bucket = grouped.get(root.id) ?? []; bucket.push(category); grouped.set(root.id, bucket); } const rootIds = [...grouped.keys()]; const selected: CategoryInfo[] = []; let depth = 0; while (selected.length < maxCategories) { let progressed = false; for (const rootId of rootIds) { const bucket = grouped.get(rootId) ?? []; if (depth >= bucket.length) continue; selected.push(bucket[depth]!); progressed = true; if (selected.length >= maxCategories) break; } if (!progressed) break; depth++; } return selected; } async function discoverCategories( maxCategories: number, ): Promise { const data = await keepaGetJson( `/category?key=${encodeURIComponent(config.keepaApiKey)}&domain=${DOMAIN_US}&category=0`, ); const categories = normalizeCategoryList(data); if (categories.length === 0) { throw new Error("Keepa category discovery returned no usable categories."); } const prioritized = prioritizeLikelyBestsellerCategories(categories); return selectCategoriesAcrossRoots(prioritized, maxCategories); } async function fetchCategoryBestSellerAsins( category: CategoryInfo, limit: number, ): Promise { const query = new URLSearchParams({ key: config.keepaApiKey, domain: String(DOMAIN_US), category: String(category.id), range: "0", variations: "0", sublist: category.parentId > 0 ? "1" : "0", }); const data = await keepaGetJson(`/bestsellers?${query.toString()}`); const bestSellersList = data?.bestSellersList; const candidates = [ bestSellersList, bestSellersList?.asinList, bestSellersList?.asins, bestSellersList?.bestSellers, bestSellersList?.bestSellerAsins, data?.asinList, data?.asins, data?.bestsellers, data?.bestSellers, data?.bestSellerAsins, data?.bestsellerList?.asinList, data?.categories?.[String(category.id)]?.asinList, ]; for (const value of candidates) { if (Array.isArray(value)) { return [ ...new Set(value.map((v) => String(v).trim()).filter(Boolean)), ].slice(0, limit); } } return []; } async function fetchSellabilityMap( asins: string[], ): Promise> { const sellability = new Map(); for (let i = 0; i < asins.length; i += SELLABILITY_BATCH_SIZE) { const chunk = asins.slice(i, i + SELLABILITY_BATCH_SIZE); const chunkResults = await fetchSellabilityBatch(chunk); for (const asin of chunk) { const info = chunkResults.get(asin) ?? { canSell: null, sellabilityStatus: "unknown" as const, sellabilityReason: "Sellability check returned no result", }; sellability.set(asin, info); } log( "info", ` Sellability progress: ${Math.min(i + chunk.length, asins.length)}/${asins.length}`, ); } return sellability; } async function fetchSpApiMap( asins: string[], sellabilityMap: Map, ): Promise> { const pricingQueue = [...asins]; const spApiMap = new Map(); let done = 0; async function worker(): Promise { while (pricingQueue.length > 0) { const asin = pricingQueue.shift(); if (!asin) return; const sellability = sellabilityMap.get(asin) ?? { canSell: null, sellabilityStatus: "unknown", sellabilityReason: "Sellability missing", }; const spApi = await fetchSpApiPricingAndFees(asin, sellability); spApiMap.set(asin, spApi); done++; if (done % 10 === 0 || done === asins.length) { log("info", ` Pricing progress: ${done}/${asins.length}`); } } } const workers = Array.from( { length: Math.min(PRICING_CONCURRENCY, asins.length || 1) }, () => worker(), ); await Promise.all(workers); return spApiMap; } function pickKeepaNumber(...values: unknown[]): number | null { for (const value of values) { if (typeof value !== "number" || !Number.isFinite(value)) continue; if (value < 0) continue; return value; } return null; } function extractCurrentPrice(csv: number[][] | undefined): number | null { if (!Array.isArray(csv)) return null; for (const series of [csv[0], csv[1]]) { if (Array.isArray(series) && series.length >= 2) { const lastPrice = series[series.length - 1]; if (typeof lastPrice === "number" && lastPrice > 0) { return Math.round((lastPrice / 100) * 100) / 100; } } } return null; } function parseKeepaProduct(product: Record): KeepaData { const stats = product.stats; const csv = product.csv; const salesRankDrops30 = pickKeepaNumber( product.salesRankDrops30, stats?.salesRankDrops30, ); const salesRankDrops90 = pickKeepaNumber(product.salesRankDrops90, stats?.salesRankDrops90) ?? (salesRankDrops30 != null ? salesRankDrops30 * 3 : null); const monthlySold = pickKeepaNumber(product.monthlySold, stats?.monthlySold) ?? salesRankDrops30; const amazonIsSeller = resolveAmazonIsSeller(product, stats, csv); const amazonBuyboxSharePct90d = extractAmazonBuyboxSharePct90d(product, stats) ?? computeAmazonBuyBoxSharePctFromHistory( product.buyBoxSellerIdHistory, 90, new Set([AMAZON_US_SELLER_ID]), ); return { currentPrice: extractCurrentPrice(csv), avgPrice90: stats?.avg?.[0] != null ? stats.avg[0] / 100 : null, minPrice90: stats?.min?.[0] != null ? stats.min[0] / 100 : null, maxPrice90: stats?.max?.[0] != null ? stats.max[0] / 100 : null, salesRank: stats?.current?.[3] ?? null, salesRankAvg90: stats?.avg?.[3] ?? null, salesRankDrops30, salesRankDrops90, sellerCount: stats?.current?.[11] ?? null, amazonIsSeller, amazonBuyboxSharePct90d, buyBoxSeller: product.buyBoxSellerId ?? null, buyBoxPrice: stats?.current?.[18] != null ? stats.current[18] / 100 : null, monthlySold, categoryTree: product.categoryTree?.map((c: { name: string }) => c.name) ?? [], }; } function resolveAmazonIsSeller( product: Record, stats: Record | undefined, csv: number[][] | undefined, ): boolean | null { if (typeof product.isAmazonSeller === "boolean") return product.isAmazonSeller; if (typeof product.availabilityAmazon === "number") { if (product.availabilityAmazon >= 0) return true; if ( product.availabilityAmazon === -1 || product.availabilityAmazon === -2 ) { return false; } } if (stats?.buyBoxIsAmazon === true) return true; if (typeof stats?.current?.[0] === "number") { if (stats.current[0] > 0) return true; if (stats.current[0] === -1 || stats.current[0] === -2) return false; } const latestAmazonPrice = extractLatestPositivePrice(csv?.[0]); if (latestAmazonPrice != null) return true; return null; } function extractAmazonBuyboxSharePct90d( product: Record, stats: Record | undefined, ): number | null { const candidates: unknown[] = [ product.buyBoxStatsAmazon90, stats?.buyBoxStatsAmazon90, product.buyBoxStats?.amazon90, product.buyBoxStats?.amazon?.[90], product.buyBoxStats?.amazon?.["90"], product.buyBoxStats?.[AMAZON_US_SELLER_ID]?.[90], product.buyBoxStats?.[AMAZON_US_SELLER_ID]?.["90"], ]; for (const value of candidates) { if (typeof value !== "number" || !Number.isFinite(value)) continue; if (value < 0 || value > 100) continue; return Math.round(value * 100) / 100; } return null; } function computeAmazonBuyBoxSharePctFromHistory( history: unknown, windowDays: number, amazonSellerIds: Set, ): number | null { if (!Array.isArray(history) || history.length < 2) return null; const nowKeepaMinutes = Math.floor(Date.now() / 60_000) - KEEPA_MINUTES_OFFSET; const windowStart = nowKeepaMinutes - windowDays * 24 * 60; let qualifiedMinutes = 0; let amazonMinutes = 0; for (let i = 0; i < history.length - 1; i += 2) { const startMinute = Number.parseInt(String(history[i]), 10); const sellerId = String(history[i + 1] ?? "").toUpperCase(); const nextRaw = i + 2 < history.length ? history[i + 2] : nowKeepaMinutes; const endMinute = Number.parseInt(String(nextRaw), 10); if (!Number.isFinite(startMinute) || !Number.isFinite(endMinute)) continue; if (endMinute <= startMinute) continue; const intervalStart = Math.max(startMinute, windowStart); const intervalEnd = Math.min(endMinute, nowKeepaMinutes); if (intervalEnd <= intervalStart) continue; if (sellerId === "-1" || sellerId === "-2") continue; const minutes = intervalEnd - intervalStart; qualifiedMinutes += minutes; if (amazonSellerIds.has(sellerId)) { amazonMinutes += minutes; } } if (qualifiedMinutes === 0) return null; return Math.round((amazonMinutes / qualifiedMinutes) * 10_000) / 100; } function extractLatestPositivePrice(series: unknown): number | null { if (!Array.isArray(series) || series.length < 2) return null; const last = series[series.length - 1]; if (typeof last !== "number" || !Number.isFinite(last) || last <= 0) { return null; } return last / 100; } async function fetchKeepaEnrichmentMap( asins: string[], ): Promise> { const out = new Map(); for (let i = 0; i < asins.length; i += KEEPA_PRODUCT_CHUNK_SIZE) { const chunk = asins.slice(i, i + KEEPA_PRODUCT_CHUNK_SIZE); const asinParam = encodeURIComponent(chunk.join(",")); const data = await keepaGetJson( `/product?key=${encodeURIComponent(config.keepaApiKey)}&domain=${DOMAIN_US}&asin=${asinParam}&stats=90&buybox=1&days=90`, ); const products = Array.isArray(data?.products) ? data.products : []; for (const product of products) { const asin = String(product?.asin ?? "").trim(); if (!asin) continue; out.set(asin, { keepa: parseKeepaProduct(product), title: String(product?.title ?? "").trim(), }); } log( "info", ` Keepa enrichment progress: ${Math.min(i + chunk.length, asins.length)}/${asins.length}`, ); } return out; } function selectTopMonthlySoldAsins( asins: string[], keepaEnrichmentMap: Map, perCategoryTop: number, minMonthlySold: number, ): string[] { return [...asins] .map((asin) => ({ asin, monthlySold: keepaEnrichmentMap.get(asin)?.keepa.monthlySold, })) .filter( (item): item is { asin: string; monthlySold: number } => typeof item.monthlySold === "number" && item.monthlySold >= minMonthlySold, ) .sort((a, b) => b.monthlySold - a.monthlySold) .slice(0, perCategoryTop) .map((item) => item.asin); } function buildEnrichedProducts( asins: string[], sellabilityMap: Map, spApiMap: Map, keepaEnrichmentMap: Map, ): EnrichedProduct[] { return asins.map((asin) => { const sellability = sellabilityMap.get(asin) ?? { canSell: null, sellabilityStatus: "unknown" as const, sellabilityReason: "Sellability missing", }; const spApi = spApiMap.get(asin) ?? { fbaFee: 0, fbmFee: 0, referralFeePercent: 15, estimatedSalePrice: 0, canSell: sellability.canSell, sellabilityStatus: sellability.sellabilityStatus, sellabilityReason: sellability.sellabilityReason, }; const enrichedKeepa = keepaEnrichmentMap.get(asin); const keepa = enrichedKeepa?.keepa ?? null; const title = enrichedKeepa?.title ?? asin; const record: ProductRecord = { asin, name: title, unitCost: 0, category: undefined, brand: undefined, supplier: undefined, }; if (keepa?.currentPrice && spApi.estimatedSalePrice === 0) { spApi.estimatedSalePrice = keepa.currentPrice; } return { record, keepa, spApi, fetchedAt: new Date().toISOString(), }; }); } export async function processCategory( runId: number, category: CategoryInfo, perCategoryTop: number, categoryCandidatePool: number, minMonthlySold: number, useClaude = false, ): Promise { log("info", `\nCategory ${category.label} (${category.id})`); const topAsins = await fetchCategoryBestSellerAsins( category, categoryCandidatePool, ); if (topAsins.length === 0) { log("info", " Keepa returned no ASINs for this category."); await updateCategoryRunSummary(runId, { topAsinsChecked: 0, availableAsins: 0, fba: 0, fbm: 0, skip: 0, status: "empty", error: "No ASINs returned by Keepa", }); return { categoryId: category.id, categoryLabel: category.label, topAsinsChecked: 0, availableAsins: 0, fba: 0, fbm: 0, skip: 0, status: "empty", error: "No ASINs returned by Keepa", results: [], }; } const uniqueTopAsins = Array.from(new Set(topAsins)); if (uniqueTopAsins.length !== topAsins.length) { log( "warn", ` Removed ${topAsins.length - uniqueTopAsins.length} duplicate ASINs before analysis.`, ); } log("info", ` Candidate ASINs fetched: ${uniqueTopAsins.length}`); const sellabilityMap = await fetchSellabilityMap(uniqueTopAsins); const availableAsins = uniqueTopAsins.filter((asin) => { const info = sellabilityMap.get(asin); return info?.canSell === true && info.sellabilityStatus === "available"; }); log( "info", ` Sellable ASINs: ${availableAsins.length}/${uniqueTopAsins.length}`, ); if (availableAsins.length === 0) { await updateCategoryRunSummary(runId, { topAsinsChecked: uniqueTopAsins.length, availableAsins: 0, fba: 0, fbm: 0, skip: 0, status: "empty", error: "No sellable ASINs", }); return { categoryId: category.id, categoryLabel: category.label, topAsinsChecked: uniqueTopAsins.length, availableAsins: 0, fba: 0, fbm: 0, skip: 0, status: "empty", error: "No sellable ASINs", results: [], }; } const keepaEnrichment = await fetchKeepaEnrichmentMap(availableAsins); const selectedAsins = selectTopMonthlySoldAsins( availableAsins, keepaEnrichment, perCategoryTop, minMonthlySold, ); log( "info", ` Selected by monthly sold >= ${minMonthlySold}: ${selectedAsins.length}/${availableAsins.length}`, ); if (selectedAsins.length === 0) { await updateCategoryRunSummary(runId, { topAsinsChecked: uniqueTopAsins.length, availableAsins: 0, fba: 0, fbm: 0, skip: 0, status: "empty", error: `No sellable ASINs met monthly sold >= ${minMonthlySold}`, }); return { categoryId: category.id, categoryLabel: category.label, topAsinsChecked: uniqueTopAsins.length, availableAsins: 0, fba: 0, fbm: 0, skip: 0, status: "empty", error: `No sellable ASINs met monthly sold >= ${minMonthlySold}`, results: [], }; } const spApiMap = await fetchSpApiMap(selectedAsins, sellabilityMap); const enrichedProducts = buildEnrichedProducts( selectedAsins, sellabilityMap, spApiMap, keepaEnrichment, ); const results: AnalysisResult[] = []; let fba = 0; let fbm = 0; let skip = 0; const totalBatches = Math.ceil(enrichedProducts.length / LLM_BATCH_SIZE); for (let i = 0; i < enrichedProducts.length; i += LLM_BATCH_SIZE) { const batch = enrichedProducts.slice(i, i + LLM_BATCH_SIZE); const batchNum = Math.floor(i / LLM_BATCH_SIZE) + 1; log("info", ` LLM batch ${batchNum}/${totalBatches}...`); let batchVerdicts: LlmVerdict[]; try { batchVerdicts = await analyzeProducts(batch, { useClaude }); } catch (err) { const message = err instanceof Error ? err.message : String(err); log("warn", ` LLM batch failed: ${message}`); batchVerdicts = batch.map((p) => ({ asin: p.record.asin, verdict: "SKIP", confidence: 0, reasoning: "LLM analysis failed", })); } const verdictByAsin = new Map(batchVerdicts.map((v) => [v.asin, v])); const batchResults: AnalysisResult[] = batch.map((product) => ({ product, verdict: verdictByAsin.get(product.record.asin) ?? { asin: product.record.asin, verdict: "SKIP", confidence: 0, reasoning: "LLM returned no verdict", }, })); await insertProductAnalysisResults(runId, batchResults); for (const result of batchResults) { results.push(result); if (result.verdict.verdict === "FBA") { fba++; } else if (result.verdict.verdict === "FBM") { fbm++; } else { skip++; } } await updateCategoryRunSummary(runId, { topAsinsChecked: uniqueTopAsins.length, availableAsins: selectedAsins.length, fba, fbm, skip, status: "running", error: "", }); log( "info", ` Persisted batch ${batchNum}/${totalBatches} (${batchResults.length} rows, totals FBA/FBM/SKIP=${fba}/${fbm}/${skip})`, ); if (i + LLM_BATCH_SIZE < enrichedProducts.length) { await sleep(1500); } } await updateCategoryRunSummary(runId, { topAsinsChecked: uniqueTopAsins.length, availableAsins: selectedAsins.length, fba, fbm, skip, status: "ok", error: "", }); return { categoryId: category.id, categoryLabel: category.label, topAsinsChecked: uniqueTopAsins.length, availableAsins: selectedAsins.length, fba, fbm, skip, status: "ok", error: "", results, }; } export async function main(): Promise { const args = parseArgs(); assertSpApiPrerequisites(); mkdirSync(args.outputDir, { recursive: true }); log("info", "Starting per-category monthly-sold pipeline"); log("info", `Marketplace: ${config.spApiMarketplaceId}`); log("info", `SP-API region: ${config.spApiRegion}`); log("info", `Category limit: ${args.categoryLimit}`); log( "info", `Top ASINs per category after monthly sort: ${args.perCategoryTop}`, ); log("info", `Category candidate pool: ${args.categoryCandidatePool}`); log("info", `Minimum monthly sold: ${args.minMonthlySold}`); log("info", `Blacklist file: ${args.blacklistFile}`); const categoryBlacklist = loadCategoryBlacklist(args.blacklistFile); log("info", `Loaded ${categoryBlacklist.size} blacklisted category IDs.`); const categories = await discoverCategories(args.categoryLimit); const allowedCategories = categories.filter( (c) => !categoryBlacklist.has(c.id), ); const blacklistedCount = categories.length - allowedCategories.length; log( "info", `Discovered ${categories.length} categories (${blacklistedCount} blacklisted, ${allowedCategories.length} to process).`, ); const runTimestamp = new Date().toISOString(); let processedCategories = 0; let totalInsertedAsins = 0; const allCategorySummaries: CategoryRunSummary[] = []; for (const category of allowedCategories) { let categorySummary: CategoryRunSummary; let runId: number | undefined; try { runId = await insertCategoryRunSummary( { categoryId: category.id, categoryLabel: category.label, topAsinsChecked: 0, availableAsins: 0, fba: 0, fbm: 0, skip: 0, status: "running", error: "", results: [], }, runTimestamp, ); categorySummary = await processCategory( runId, category, args.perCategoryTop, args.categoryCandidatePool, args.minMonthlySold, args.useClaude, ); totalInsertedAsins += categorySummary.results?.length ?? 0; processedCategories++; allCategorySummaries.push({ ...categorySummary, runId }); } catch (err) { const message = err instanceof Error ? err.message : String(err); log( "warn", `Skipping category ${category.label} (${category.id}) due to error: ${message}`, ); categorySummary = { categoryId: category.id, categoryLabel: category.label, topAsinsChecked: 0, availableAsins: 0, fba: 0, fbm: 0, skip: 0, status: "failed", error: message, results: [], }; if (runId) { await updateCategoryRunSummary(runId, { topAsinsChecked: 0, availableAsins: 0, fba: 0, fbm: 0, skip: 0, status: "failed", error: message, }); } processedCategories++; allCategorySummaries.push({ ...categorySummary, runId }); } } log("info", "\nRun summary"); log("info", `Categories discovered/selected: ${categories.length}`); log("info", `Categories processed: ${processedCategories}`); log("info", `Total ASINs inserted into DB: ${totalInsertedAsins}`); } if (import.meta.main) { main().catch((err) => { log("error", `Monthly-sold process crashed: ${String(err)}`); process.exit(1); }); }