Files
asin-check/src/top-monthly-sold-by-category.ts
Victor Noguera b982edd160 Refactor database interactions to use Drizzle ORM
- Replaced direct SQLite database calls with Drizzle ORM methods in `top-monthly-sold-by-category.ts`, `writer.ts`, and `upc-file-analysis.ts`.
- Updated test cases in `top-monthly-sold-by-category.test.ts` to mock the new database interactions.
- Removed unnecessary database initialization and cleanup code.
- Improved code readability and maintainability by using ORM features for inserting and updating records.
2026-05-25 00:08:30 -04:00

1377 lines
40 KiB
TypeScript

import { existsSync, mkdirSync, readFileSync } from "node:fs";
import path from "node:path";
import { db } from "./db/index.ts";
import { runs, categoryProductResults } from "./db/schema.ts";
import { eq, sql } from "drizzle-orm";
import { config } from "./config.ts";
import { analyzeProducts } from "./llm.ts";
import { fetchSellabilityBatch, fetchSpApiPricingAndFees } from "./sp-api.ts";
import type {
AnalysisResult,
EnrichedProduct,
KeepaData,
LlmVerdict,
ProductRecord,
SellabilityInfo,
SpApiData,
} from "./types.ts";
type CategoryInfo = {
id: number;
label: string;
parentId: number;
childCount: number;
};
type ParsedArgs = {
outputDir: string;
categoryLimit: number;
perCategoryTop: number;
categoryCandidatePool: number;
minMonthlySold: number;
blacklistFile: string;
useClaude: boolean;
};
type CategoryRunSummary = {
categoryId: number;
categoryLabel: string;
topAsinsChecked: number;
availableAsins: number;
fba: number;
fbm: number;
skip: number;
status: "running" | "ok" | "empty" | "failed";
error: string;
runId?: number;
results?: AnalysisResult[];
};
const KEEPA_BASE = "https://api.keepa.com";
const DOMAIN_US = 1;
const AMAZON_US_SELLER_ID = "ATVPDKIKX0DER";
const KEEPA_MINUTES_OFFSET = 21_564_000;
const DEFAULT_CATEGORY_LIMIT = 32;
const DEFAULT_PER_CATEGORY_TOP = 100;
const DEFAULT_CATEGORY_CANDIDATE_POOL = 500;
const DEFAULT_MIN_MONTHLY_SOLD = 300;
const SELLABILITY_BATCH_SIZE = 60;
const LLM_BATCH_SIZE = 10;
const PRICING_CONCURRENCY = 5;
const KEEPA_PRODUCT_CHUNK_SIZE = 100;
const DEFAULT_BLACKLIST_FILE = path.join(
process.cwd(),
"category-blacklist.csv",
);
let keepaTokensLeft = 1;
let keepaRefillRate = 1;
let keepaLastRequestMs = 0;
function log(
level: "info" | "warn" | "error",
message: string,
...args: any[]
) {
const timestamp = new Date().toISOString();
console.log(`[${timestamp}] [${level.toUpperCase()}] ${message}`, ...args);
}
function parseArgs(): ParsedArgs {
const args = process.argv.slice(2);
const useClaude = hasFlag(args, "--claude");
const outputDir =
readFlagValue(args, "--out-dir") ?? path.join(process.cwd(), "output");
const blacklistFile =
readFlagValue(args, "--blacklist-file") ?? DEFAULT_BLACKLIST_FILE;
const categoryLimitRaw = readFlagValue(args, "--category-limit");
const perCategoryTopRaw = readFlagValue(args, "--per-category-top");
const categoryCandidatePoolRaw = readFlagValue(
args,
"--category-candidate-pool",
);
const minMonthlySoldRaw = readFlagValue(args, "--min-monthly-sold");
const categoryLimit = categoryLimitRaw
? Number(categoryLimitRaw)
: DEFAULT_CATEGORY_LIMIT;
const perCategoryTop = perCategoryTopRaw
? Number(perCategoryTopRaw)
: DEFAULT_PER_CATEGORY_TOP;
const categoryCandidatePool = categoryCandidatePoolRaw
? Number(categoryCandidatePoolRaw)
: DEFAULT_CATEGORY_CANDIDATE_POOL;
const minMonthlySold = minMonthlySoldRaw
? Number(minMonthlySoldRaw)
: DEFAULT_MIN_MONTHLY_SOLD;
if (!Number.isInteger(categoryLimit) || categoryLimit <= 0) {
printUsageAndExit("--category-limit must be a positive integer.");
}
if (!Number.isInteger(perCategoryTop) || perCategoryTop <= 0) {
printUsageAndExit("--per-category-top must be a positive integer.");
}
if (!Number.isInteger(categoryCandidatePool) || categoryCandidatePool <= 0) {
printUsageAndExit("--category-candidate-pool must be a positive integer.");
}
if (categoryCandidatePool < perCategoryTop) {
printUsageAndExit(
"--category-candidate-pool must be greater than or equal to --per-category-top.",
);
}
if (!Number.isInteger(minMonthlySold) || minMonthlySold < 0) {
printUsageAndExit("--min-monthly-sold must be a non-negative integer.");
}
return {
outputDir,
categoryLimit,
perCategoryTop,
categoryCandidatePool,
minMonthlySold,
blacklistFile,
useClaude,
};
}
function hasFlag(args: string[], flag: string): boolean {
return args.includes(flag);
}
function readFlagValue(args: string[], flag: string): string | undefined {
const idx = args.indexOf(flag);
if (idx === -1) return undefined;
return args[idx + 1];
}
function printUsageAndExit(message: string): never {
if (message) {
log("error", message);
}
log(
"error",
[
"Usage:",
" bun run src/top-monthly-sold-by-category.ts [--category-limit 32] [--per-category-top 100] [--category-candidate-pool 500] [--min-monthly-sold 300] [--out-dir output] [--blacklist-file category-blacklist.csv] [--claude]",
"",
"Flow:",
" 1) Discover categories and round-robin selection.",
" 2) For each category: fetch a candidate pool, then keep only sellable ASINs.",
" 3) Rank sellable ASINs by Keepa monthlySold and keep top N where monthlySold >= threshold.",
" 4) Enrich selected ASINs with Keepa + SP-API pricing/fees.",
" 5) LLM-analyze and persist per category.",
].join("\n"),
);
process.exit(1);
}
export async function insertCategoryRunSummary(
summary: CategoryRunSummary,
runTimestamp: string,
): Promise<number> {
const [row] = await db
.insert(runs)
.values({
type: "category_analysis",
status: (summary.status as typeof runs.$inferInsert.status) ?? "running",
categoryId: summary.categoryId,
categoryLabel: summary.categoryLabel,
topAsinsChecked: summary.topAsinsChecked,
availableAsins: summary.availableAsins,
totalProducts: summary.topAsinsChecked,
fbaCount: summary.fba,
fbmCount: summary.fbm,
skipCount: summary.skip,
errorMessage: summary.error || null,
startedAt: new Date(runTimestamp),
})
.returning({ id: runs.id });
if (!row) throw new Error("Failed to insert category run.");
return row.id;
}
export async function updateCategoryRunSummary(
runId: number,
summary: Pick<
CategoryRunSummary,
| "topAsinsChecked"
| "availableAsins"
| "fba"
| "fbm"
| "skip"
| "status"
| "error"
>,
): Promise<void> {
await db
.update(runs)
.set({
topAsinsChecked: summary.topAsinsChecked,
availableAsins: summary.availableAsins,
totalProducts: summary.topAsinsChecked,
fbaCount: summary.fba,
fbmCount: summary.fbm,
skipCount: summary.skip,
status: summary.status as typeof runs.$inferInsert.status,
errorMessage: summary.error || null,
...(summary.status !== "running" ? { completedAt: new Date() } : {}),
})
.where(eq(runs.id, runId));
}
export async function insertProductAnalysisResults(
runId: number,
results: AnalysisResult[],
): Promise<void> {
if (results.length === 0) return;
const rows = results.map((r) => {
const price =
r.product.keepa?.currentPrice ??
r.product.record.sellingPriceFromSheet ??
r.product.spApi.estimatedSalePrice;
const rank = r.product.keepa?.salesRank ?? r.product.record.amazonRank;
return {
asin: r.product.record.asin,
runId,
name: r.product.record.name,
brand: r.product.record.brand ?? null,
category:
r.product.record.category ??
r.product.keepa?.categoryTree?.join(" > ") ??
null,
unitCost: r.product.record.unitCost ?? null,
currentPrice: price ?? null,
avgPrice90d: r.product.keepa?.avgPrice90 ?? null,
avgPrice90dSheet: r.product.record.avgPrice90FromSheet ?? null,
sellingPriceSheet: r.product.record.sellingPriceFromSheet ?? null,
salesRank: rank ?? null,
salesRankAvg90d: r.product.keepa?.salesRankAvg90 ?? null,
sellerCount: r.product.keepa?.sellerCount ?? null,
amazonIsSeller: r.product.keepa?.amazonIsSeller ?? null,
amazonBuyboxSharePct90d: r.product.keepa?.amazonBuyboxSharePct90d ?? null,
monthlySold: r.product.keepa?.monthlySold ?? null,
rankDrops30d: r.product.keepa?.salesRankDrops30 ?? null,
rankDrops90d: r.product.keepa?.salesRankDrops90 ?? null,
fbaFee: r.product.spApi.fbaFee ?? null,
fbmFee: r.product.spApi.fbmFee ?? null,
referralPercent: r.product.spApi.referralFeePercent ?? null,
canSell:
r.product.spApi.canSell == null
? "unknown"
: r.product.spApi.canSell
? "yes"
: "no",
sellabilityStatus: r.product.spApi.sellabilityStatus ?? null,
sellabilityReason: r.product.spApi.sellabilityReason ?? null,
verdict: r.verdict.verdict,
confidence: r.verdict.confidence,
reasoning: r.verdict.reasoning ?? null,
fetchedAt: new Date(r.product.fetchedAt),
};
});
await db
.insert(categoryProductResults)
.values(rows)
.onConflictDoUpdate({
target: categoryProductResults.asin,
set: {
runId: sql`EXCLUDED.run_id`,
name: sql`EXCLUDED.name`,
brand: sql`EXCLUDED.brand`,
category: sql`EXCLUDED.category`,
unitCost: sql`EXCLUDED.unit_cost`,
currentPrice: sql`EXCLUDED.current_price`,
avgPrice90d: sql`EXCLUDED.avg_price_90d`,
avgPrice90dSheet: sql`EXCLUDED.avg_price_90d_sheet`,
sellingPriceSheet: sql`EXCLUDED.selling_price_sheet`,
salesRank: sql`EXCLUDED.sales_rank`,
salesRankAvg90d: sql`EXCLUDED.sales_rank_avg_90d`,
sellerCount: sql`EXCLUDED.seller_count`,
amazonIsSeller: sql`EXCLUDED.amazon_is_seller`,
amazonBuyboxSharePct90d: sql`EXCLUDED.amazon_buybox_share_pct_90d`,
monthlySold: sql`EXCLUDED.monthly_sold`,
rankDrops30d: sql`EXCLUDED.rank_drops_30d`,
rankDrops90d: sql`EXCLUDED.rank_drops_90d`,
fbaFee: sql`EXCLUDED.fba_fee`,
fbmFee: sql`EXCLUDED.fbm_fee`,
referralPercent: sql`EXCLUDED.referral_percent`,
canSell: sql`EXCLUDED.can_sell`,
sellabilityStatus: sql`EXCLUDED.sellability_status`,
sellabilityReason: sql`EXCLUDED.sellability_reason`,
verdict: sql`EXCLUDED.verdict`,
confidence: sql`EXCLUDED.confidence`,
reasoning: sql`EXCLUDED.reasoning`,
fetchedAt: sql`EXCLUDED.fetched_at`,
},
});
}
function loadCategoryBlacklist(filePath: string): Set<number> {
const blacklist = new Set<number>();
if (!existsSync(filePath)) {
log(
"warn",
`Blacklist file not found at ${filePath}; continuing with no excluded categories.`,
);
return blacklist;
}
const raw = readFileSync(filePath, "utf8");
const lines = raw.split(/\r?\n/);
for (let i = 0; i < lines.length; i++) {
const lineNumber = i + 1;
const line = lines[i] ?? "";
const trimmed = line.trim();
if (!trimmed || trimmed.startsWith("#")) continue;
const [idPart, namePart] = trimmed.split(",", 2);
const idToken = idPart?.trim() ?? "";
const nameToken = namePart?.trim() ?? "";
// Allow header row: id,name
if (idToken.toLowerCase() === "id") {
continue;
}
if (!idToken) {
log(
"warn",
`Blacklist CSV line ${lineNumber}: missing id, row ignored (${trimmed}).`,
);
continue;
}
const id = Number(idToken);
if (!Number.isInteger(id) || id <= 0) {
log(
"warn",
`Blacklist CSV line ${lineNumber}: invalid id '${idToken}', row ignored (${trimmed}).`,
);
continue;
}
if (!nameToken) {
log(
"warn",
`Blacklist CSV line ${lineNumber}: missing name for id ${id}; accepted but please add name.`,
);
}
if (blacklist.has(id)) {
log(
"warn",
`Blacklist CSV line ${lineNumber}: duplicate id ${id}, keeping first occurrence.`,
);
continue;
}
blacklist.add(id);
}
return blacklist;
}
function assertSpApiPrerequisites(): void {
const missing: string[] = [];
if (!config.spApiClientId) missing.push("SP_API_CLIENT_ID");
if (!config.spApiClientSecret) missing.push("SP_API_CLIENT_SECRET");
if (!config.spApiRefreshToken) missing.push("SP_API_REFRESH_TOKEN");
if (!config.spApiSellerId) missing.push("SP_API_SELLER_ID");
if (missing.length > 0) {
throw new Error(`Missing required SP-API env vars: ${missing.join(", ")}`);
}
}
function sleep(ms: number): Promise<void> {
return new Promise((resolve) => setTimeout(resolve, ms));
}
function sanitizeFileSegment(value: string): string {
const compact = value.trim().toLowerCase().replace(/\s+/g, "-");
const safe = compact.replace(/[^a-z0-9-_]+/g, "-").replace(/-+/g, "-");
return safe.replace(/^-|-$/g, "") || "category";
}
function parseKeepaRateLimitPayload(text: string): {
refillInMs?: number;
tokensLeft?: number;
refillRate?: number;
} {
try {
const parsed = JSON.parse(text);
return {
refillInMs:
typeof parsed?.refillIn === "number" && Number.isFinite(parsed.refillIn)
? Math.max(0, parsed.refillIn)
: undefined,
tokensLeft:
typeof parsed?.tokensLeft === "number" &&
Number.isFinite(parsed.tokensLeft)
? parsed.tokensLeft
: undefined,
refillRate:
typeof parsed?.refillRate === "number" &&
Number.isFinite(parsed.refillRate)
? parsed.refillRate
: undefined,
};
} catch {
return {};
}
}
function computeBackoffMs(attempt: number, refillInMs?: number): number {
const refillBased = refillInMs != null ? refillInMs + 1500 : 0;
const exponential = Math.min(60_000, 2 ** attempt * 1000);
const base = Math.max(refillBased, exponential);
return base + Math.floor(Math.random() * 750);
}
async function waitForKeepaToken(): Promise<void> {
if (keepaTokensLeft > 0) return;
const elapsedMinutes = (Date.now() - keepaLastRequestMs) / 60_000;
const regenerated = Math.floor(elapsedMinutes * keepaRefillRate);
if (regenerated > 0) {
keepaTokensLeft += regenerated;
return;
}
const waitMs =
Math.ceil((1 / keepaRefillRate) * 60_000) -
(Date.now() - keepaLastRequestMs);
if (waitMs > 0) {
log(
"info",
`Keepa tokens depleted; waiting ${Math.ceil(waitMs / 1000)}s...`,
);
await sleep(waitMs);
}
keepaTokensLeft = 1;
}
async function keepaGetJson(pathAndQuery: string): Promise<any> {
let rateLimitHits = 0;
while (true) {
await waitForKeepaToken();
const response = await fetch(`${KEEPA_BASE}${pathAndQuery}`);
keepaLastRequestMs = Date.now();
if (response.ok) {
const data = (await response.json()) as any;
if (typeof data?.tokensLeft === "number") {
keepaTokensLeft = data.tokensLeft;
}
if (typeof data?.refillRate === "number" && data.refillRate > 0) {
keepaRefillRate = data.refillRate;
}
return data;
}
const text = await response.text();
if (response.status === 429) {
const rate = parseKeepaRateLimitPayload(text);
if (typeof rate.tokensLeft === "number") {
keepaTokensLeft = rate.tokensLeft;
}
if (typeof rate.refillRate === "number" && rate.refillRate > 0) {
keepaRefillRate = rate.refillRate;
}
rateLimitHits++;
const waitMs = computeBackoffMs(rateLimitHits, rate.refillInMs);
log(
"warn",
`Keepa rate limited (429). Retry ${rateLimitHits} in ${Math.ceil(waitMs / 1000)}s...`,
);
await sleep(waitMs);
continue;
}
throw new Error(`Keepa HTTP ${response.status}: ${text}`);
}
}
function normalizeCategoryList(data: any): CategoryInfo[] {
const deduped = new Map<number, CategoryInfo>();
const addRawCategory = (value: any): void => {
const id = Number(
value?.catId ?? value?.categoryId ?? value?.id ?? value?.nodeId,
);
if (!Number.isInteger(id) || id <= 0) return;
const label = String(
value?.name ?? value?.label ?? `Category ${id}`,
).trim();
const parentId = Number(value?.parent ?? value?.parentId ?? -1);
const childCount = Array.isArray(value?.children)
? value.children.length
: Number.isInteger(value?.childCount)
? Number(value.childCount)
: 0;
if (id === 0 || label.toLowerCase() === "root" || parentId === -1) {
return;
}
if (!deduped.has(id)) {
deduped.set(id, {
id,
label: label || `Category ${id}`,
parentId,
childCount: Math.max(0, childCount),
});
}
};
if (Array.isArray(data?.categories)) {
for (const value of data.categories) {
addRawCategory(value);
}
}
if (data?.categories && typeof data.categories === "object") {
for (const value of Object.values(data.categories)) {
addRawCategory(value);
}
}
if (Array.isArray(data?.categoryList)) {
for (const value of data.categoryList) {
addRawCategory(value);
}
}
return [...deduped.values()];
}
function prioritizeLikelyBestsellerCategories(
categories: CategoryInfo[],
): CategoryInfo[] {
const leaves: CategoryInfo[] = [];
const nonLeaves: CategoryInfo[] = [];
for (const category of categories) {
if (category.childCount === 0) {
leaves.push(category);
} else {
nonLeaves.push(category);
}
}
const withNamedLabels = (list: CategoryInfo[]) =>
list.filter((c) => !/^Category\s+\d+$/i.test(c.label));
const withFallbackLabels = (list: CategoryInfo[]) =>
list.filter((c) => /^Category\s+\d+$/i.test(c.label));
return [
...withNamedLabels(leaves),
...withFallbackLabels(leaves),
...withNamedLabels(nonLeaves),
...withFallbackLabels(nonLeaves),
];
}
function resolveRootCategory(
category: CategoryInfo,
byId: Map<number, CategoryInfo>,
): CategoryInfo {
let current = category;
const seen = new Set<number>();
while (current.parentId > 0 && !seen.has(current.id)) {
seen.add(current.id);
const parent = byId.get(current.parentId);
if (!parent) break;
current = parent;
}
return current;
}
function selectCategoriesAcrossRoots(
categories: CategoryInfo[],
maxCategories: number,
): CategoryInfo[] {
const byId = new Map<number, CategoryInfo>(categories.map((c) => [c.id, c]));
const grouped = new Map<number, CategoryInfo[]>();
for (const category of categories) {
const root = resolveRootCategory(category, byId);
const bucket = grouped.get(root.id) ?? [];
bucket.push(category);
grouped.set(root.id, bucket);
}
const rootIds = [...grouped.keys()];
const selected: CategoryInfo[] = [];
let depth = 0;
while (selected.length < maxCategories) {
let progressed = false;
for (const rootId of rootIds) {
const bucket = grouped.get(rootId) ?? [];
if (depth >= bucket.length) continue;
selected.push(bucket[depth]!);
progressed = true;
if (selected.length >= maxCategories) break;
}
if (!progressed) break;
depth++;
}
return selected;
}
async function discoverCategories(
maxCategories: number,
): Promise<CategoryInfo[]> {
const data = await keepaGetJson(
`/category?key=${encodeURIComponent(config.keepaApiKey)}&domain=${DOMAIN_US}&category=0`,
);
const categories = normalizeCategoryList(data);
if (categories.length === 0) {
throw new Error("Keepa category discovery returned no usable categories.");
}
const prioritized = prioritizeLikelyBestsellerCategories(categories);
return selectCategoriesAcrossRoots(prioritized, maxCategories);
}
async function fetchCategoryBestSellerAsins(
category: CategoryInfo,
limit: number,
): Promise<string[]> {
const query = new URLSearchParams({
key: config.keepaApiKey,
domain: String(DOMAIN_US),
category: String(category.id),
range: "0",
variations: "0",
sublist: category.parentId > 0 ? "1" : "0",
});
const data = await keepaGetJson(`/bestsellers?${query.toString()}`);
const bestSellersList = data?.bestSellersList;
const candidates = [
bestSellersList,
bestSellersList?.asinList,
bestSellersList?.asins,
bestSellersList?.bestSellers,
bestSellersList?.bestSellerAsins,
data?.asinList,
data?.asins,
data?.bestsellers,
data?.bestSellers,
data?.bestSellerAsins,
data?.bestsellerList?.asinList,
data?.categories?.[String(category.id)]?.asinList,
];
for (const value of candidates) {
if (Array.isArray(value)) {
return [
...new Set(value.map((v) => String(v).trim()).filter(Boolean)),
].slice(0, limit);
}
}
return [];
}
async function fetchSellabilityMap(
asins: string[],
): Promise<Map<string, SellabilityInfo>> {
const sellability = new Map<string, SellabilityInfo>();
for (let i = 0; i < asins.length; i += SELLABILITY_BATCH_SIZE) {
const chunk = asins.slice(i, i + SELLABILITY_BATCH_SIZE);
const chunkResults = await fetchSellabilityBatch(chunk);
for (const asin of chunk) {
const info = chunkResults.get(asin) ?? {
canSell: null,
sellabilityStatus: "unknown" as const,
sellabilityReason: "Sellability check returned no result",
};
sellability.set(asin, info);
}
log(
"info",
` Sellability progress: ${Math.min(i + chunk.length, asins.length)}/${asins.length}`,
);
}
return sellability;
}
async function fetchSpApiMap(
asins: string[],
sellabilityMap: Map<string, SellabilityInfo>,
): Promise<Map<string, SpApiData>> {
const pricingQueue = [...asins];
const spApiMap = new Map<string, SpApiData>();
let done = 0;
async function worker(): Promise<void> {
while (pricingQueue.length > 0) {
const asin = pricingQueue.shift();
if (!asin) return;
const sellability = sellabilityMap.get(asin) ?? {
canSell: null,
sellabilityStatus: "unknown",
sellabilityReason: "Sellability missing",
};
const spApi = await fetchSpApiPricingAndFees(asin, sellability);
spApiMap.set(asin, spApi);
done++;
if (done % 10 === 0 || done === asins.length) {
log("info", ` Pricing progress: ${done}/${asins.length}`);
}
}
}
const workers = Array.from(
{ length: Math.min(PRICING_CONCURRENCY, asins.length || 1) },
() => worker(),
);
await Promise.all(workers);
return spApiMap;
}
function pickKeepaNumber(...values: unknown[]): number | null {
for (const value of values) {
if (typeof value !== "number" || !Number.isFinite(value)) continue;
if (value < 0) continue;
return value;
}
return null;
}
function extractCurrentPrice(csv: number[][] | undefined): number | null {
if (!Array.isArray(csv)) return null;
for (const series of [csv[0], csv[1]]) {
if (Array.isArray(series) && series.length >= 2) {
const lastPrice = series[series.length - 1];
if (typeof lastPrice === "number" && lastPrice > 0) {
return Math.round((lastPrice / 100) * 100) / 100;
}
}
}
return null;
}
function parseKeepaProduct(product: Record<string, any>): KeepaData {
const stats = product.stats;
const csv = product.csv;
const salesRankDrops30 = pickKeepaNumber(
product.salesRankDrops30,
stats?.salesRankDrops30,
);
const salesRankDrops90 =
pickKeepaNumber(product.salesRankDrops90, stats?.salesRankDrops90) ??
(salesRankDrops30 != null ? salesRankDrops30 * 3 : null);
const monthlySold =
pickKeepaNumber(product.monthlySold, stats?.monthlySold) ??
salesRankDrops30;
const amazonIsSeller = resolveAmazonIsSeller(product, stats, csv);
const amazonBuyboxSharePct90d =
extractAmazonBuyboxSharePct90d(product, stats) ??
computeAmazonBuyBoxSharePctFromHistory(
product.buyBoxSellerIdHistory,
90,
new Set([AMAZON_US_SELLER_ID]),
);
return {
currentPrice: extractCurrentPrice(csv),
avgPrice90: stats?.avg?.[0] != null ? stats.avg[0] / 100 : null,
minPrice90: stats?.min?.[0] != null ? stats.min[0] / 100 : null,
maxPrice90: stats?.max?.[0] != null ? stats.max[0] / 100 : null,
salesRank: stats?.current?.[3] ?? null,
salesRankAvg90: stats?.avg?.[3] ?? null,
salesRankDrops30,
salesRankDrops90,
sellerCount: stats?.current?.[11] ?? null,
amazonIsSeller,
amazonBuyboxSharePct90d,
buyBoxSeller: product.buyBoxSellerId ?? null,
buyBoxPrice: stats?.current?.[18] != null ? stats.current[18] / 100 : null,
monthlySold,
categoryTree:
product.categoryTree?.map((c: { name: string }) => c.name) ?? [],
};
}
function resolveAmazonIsSeller(
product: Record<string, any>,
stats: Record<string, any> | undefined,
csv: number[][] | undefined,
): boolean | null {
if (typeof product.isAmazonSeller === "boolean")
return product.isAmazonSeller;
if (typeof product.availabilityAmazon === "number") {
if (product.availabilityAmazon >= 0) return true;
if (
product.availabilityAmazon === -1 ||
product.availabilityAmazon === -2
) {
return false;
}
}
if (stats?.buyBoxIsAmazon === true) return true;
if (typeof stats?.current?.[0] === "number") {
if (stats.current[0] > 0) return true;
if (stats.current[0] === -1 || stats.current[0] === -2) return false;
}
const latestAmazonPrice = extractLatestPositivePrice(csv?.[0]);
if (latestAmazonPrice != null) return true;
return null;
}
function extractAmazonBuyboxSharePct90d(
product: Record<string, any>,
stats: Record<string, any> | undefined,
): number | null {
const candidates: unknown[] = [
product.buyBoxStatsAmazon90,
stats?.buyBoxStatsAmazon90,
product.buyBoxStats?.amazon90,
product.buyBoxStats?.amazon?.[90],
product.buyBoxStats?.amazon?.["90"],
product.buyBoxStats?.[AMAZON_US_SELLER_ID]?.[90],
product.buyBoxStats?.[AMAZON_US_SELLER_ID]?.["90"],
];
for (const value of candidates) {
if (typeof value !== "number" || !Number.isFinite(value)) continue;
if (value < 0 || value > 100) continue;
return Math.round(value * 100) / 100;
}
return null;
}
function computeAmazonBuyBoxSharePctFromHistory(
history: unknown,
windowDays: number,
amazonSellerIds: Set<string>,
): number | null {
if (!Array.isArray(history) || history.length < 2) return null;
const nowKeepaMinutes =
Math.floor(Date.now() / 60_000) - KEEPA_MINUTES_OFFSET;
const windowStart = nowKeepaMinutes - windowDays * 24 * 60;
let qualifiedMinutes = 0;
let amazonMinutes = 0;
for (let i = 0; i < history.length - 1; i += 2) {
const startMinute = Number.parseInt(String(history[i]), 10);
const sellerId = String(history[i + 1] ?? "").toUpperCase();
const nextRaw = i + 2 < history.length ? history[i + 2] : nowKeepaMinutes;
const endMinute = Number.parseInt(String(nextRaw), 10);
if (!Number.isFinite(startMinute) || !Number.isFinite(endMinute)) continue;
if (endMinute <= startMinute) continue;
const intervalStart = Math.max(startMinute, windowStart);
const intervalEnd = Math.min(endMinute, nowKeepaMinutes);
if (intervalEnd <= intervalStart) continue;
if (sellerId === "-1" || sellerId === "-2") continue;
const minutes = intervalEnd - intervalStart;
qualifiedMinutes += minutes;
if (amazonSellerIds.has(sellerId)) {
amazonMinutes += minutes;
}
}
if (qualifiedMinutes === 0) return null;
return Math.round((amazonMinutes / qualifiedMinutes) * 10_000) / 100;
}
function extractLatestPositivePrice(series: unknown): number | null {
if (!Array.isArray(series) || series.length < 2) return null;
const last = series[series.length - 1];
if (typeof last !== "number" || !Number.isFinite(last) || last <= 0) {
return null;
}
return last / 100;
}
async function fetchKeepaEnrichmentMap(
asins: string[],
): Promise<Map<string, { keepa: KeepaData; title: string }>> {
const out = new Map<string, { keepa: KeepaData; title: string }>();
for (let i = 0; i < asins.length; i += KEEPA_PRODUCT_CHUNK_SIZE) {
const chunk = asins.slice(i, i + KEEPA_PRODUCT_CHUNK_SIZE);
const asinParam = encodeURIComponent(chunk.join(","));
const data = await keepaGetJson(
`/product?key=${encodeURIComponent(config.keepaApiKey)}&domain=${DOMAIN_US}&asin=${asinParam}&stats=90&buybox=1&days=90`,
);
const products = Array.isArray(data?.products) ? data.products : [];
for (const product of products) {
const asin = String(product?.asin ?? "").trim();
if (!asin) continue;
out.set(asin, {
keepa: parseKeepaProduct(product),
title: String(product?.title ?? "").trim(),
});
}
log(
"info",
` Keepa enrichment progress: ${Math.min(i + chunk.length, asins.length)}/${asins.length}`,
);
}
return out;
}
function selectTopMonthlySoldAsins(
asins: string[],
keepaEnrichmentMap: Map<string, { keepa: KeepaData; title: string }>,
perCategoryTop: number,
minMonthlySold: number,
): string[] {
return [...asins]
.map((asin) => ({
asin,
monthlySold: keepaEnrichmentMap.get(asin)?.keepa.monthlySold,
}))
.filter(
(item): item is { asin: string; monthlySold: number } =>
typeof item.monthlySold === "number" &&
item.monthlySold >= minMonthlySold,
)
.sort((a, b) => b.monthlySold - a.monthlySold)
.slice(0, perCategoryTop)
.map((item) => item.asin);
}
function buildEnrichedProducts(
asins: string[],
sellabilityMap: Map<string, SellabilityInfo>,
spApiMap: Map<string, SpApiData>,
keepaEnrichmentMap: Map<string, { keepa: KeepaData; title: string }>,
): EnrichedProduct[] {
return asins.map((asin) => {
const sellability = sellabilityMap.get(asin) ?? {
canSell: null,
sellabilityStatus: "unknown" as const,
sellabilityReason: "Sellability missing",
};
const spApi = spApiMap.get(asin) ?? {
fbaFee: 0,
fbmFee: 0,
referralFeePercent: 15,
estimatedSalePrice: 0,
canSell: sellability.canSell,
sellabilityStatus: sellability.sellabilityStatus,
sellabilityReason: sellability.sellabilityReason,
};
const enrichedKeepa = keepaEnrichmentMap.get(asin);
const keepa = enrichedKeepa?.keepa ?? null;
const title = enrichedKeepa?.title ?? asin;
const record: ProductRecord = {
asin,
name: title,
unitCost: 0,
category: undefined,
brand: undefined,
supplier: undefined,
};
if (keepa?.currentPrice && spApi.estimatedSalePrice === 0) {
spApi.estimatedSalePrice = keepa.currentPrice;
}
return {
record,
keepa,
spApi,
fetchedAt: new Date().toISOString(),
};
});
}
export async function processCategory(
runId: number,
category: CategoryInfo,
perCategoryTop: number,
categoryCandidatePool: number,
minMonthlySold: number,
useClaude = false,
): Promise<CategoryRunSummary> {
log("info", `\nCategory ${category.label} (${category.id})`);
const topAsins = await fetchCategoryBestSellerAsins(
category,
categoryCandidatePool,
);
if (topAsins.length === 0) {
log("info", " Keepa returned no ASINs for this category.");
await updateCategoryRunSummary(runId, {
topAsinsChecked: 0,
availableAsins: 0,
fba: 0,
fbm: 0,
skip: 0,
status: "empty",
error: "No ASINs returned by Keepa",
});
return {
categoryId: category.id,
categoryLabel: category.label,
topAsinsChecked: 0,
availableAsins: 0,
fba: 0,
fbm: 0,
skip: 0,
status: "empty",
error: "No ASINs returned by Keepa",
results: [],
};
}
const uniqueTopAsins = Array.from(new Set(topAsins));
if (uniqueTopAsins.length !== topAsins.length) {
log(
"warn",
` Removed ${topAsins.length - uniqueTopAsins.length} duplicate ASINs before analysis.`,
);
}
log("info", ` Candidate ASINs fetched: ${uniqueTopAsins.length}`);
const sellabilityMap = await fetchSellabilityMap(uniqueTopAsins);
const availableAsins = uniqueTopAsins.filter((asin) => {
const info = sellabilityMap.get(asin);
return info?.canSell === true && info.sellabilityStatus === "available";
});
log(
"info",
` Sellable ASINs: ${availableAsins.length}/${uniqueTopAsins.length}`,
);
if (availableAsins.length === 0) {
await updateCategoryRunSummary(runId, {
topAsinsChecked: uniqueTopAsins.length,
availableAsins: 0,
fba: 0,
fbm: 0,
skip: 0,
status: "empty",
error: "No sellable ASINs",
});
return {
categoryId: category.id,
categoryLabel: category.label,
topAsinsChecked: uniqueTopAsins.length,
availableAsins: 0,
fba: 0,
fbm: 0,
skip: 0,
status: "empty",
error: "No sellable ASINs",
results: [],
};
}
const keepaEnrichment = await fetchKeepaEnrichmentMap(availableAsins);
const selectedAsins = selectTopMonthlySoldAsins(
availableAsins,
keepaEnrichment,
perCategoryTop,
minMonthlySold,
);
log(
"info",
` Selected by monthly sold >= ${minMonthlySold}: ${selectedAsins.length}/${availableAsins.length}`,
);
if (selectedAsins.length === 0) {
await updateCategoryRunSummary(runId, {
topAsinsChecked: uniqueTopAsins.length,
availableAsins: 0,
fba: 0,
fbm: 0,
skip: 0,
status: "empty",
error: `No sellable ASINs met monthly sold >= ${minMonthlySold}`,
});
return {
categoryId: category.id,
categoryLabel: category.label,
topAsinsChecked: uniqueTopAsins.length,
availableAsins: 0,
fba: 0,
fbm: 0,
skip: 0,
status: "empty",
error: `No sellable ASINs met monthly sold >= ${minMonthlySold}`,
results: [],
};
}
const spApiMap = await fetchSpApiMap(selectedAsins, sellabilityMap);
const enrichedProducts = buildEnrichedProducts(
selectedAsins,
sellabilityMap,
spApiMap,
keepaEnrichment,
);
const results: AnalysisResult[] = [];
let fba = 0;
let fbm = 0;
let skip = 0;
const totalBatches = Math.ceil(enrichedProducts.length / LLM_BATCH_SIZE);
for (let i = 0; i < enrichedProducts.length; i += LLM_BATCH_SIZE) {
const batch = enrichedProducts.slice(i, i + LLM_BATCH_SIZE);
const batchNum = Math.floor(i / LLM_BATCH_SIZE) + 1;
log("info", ` LLM batch ${batchNum}/${totalBatches}...`);
let batchVerdicts: LlmVerdict[];
try {
batchVerdicts = await analyzeProducts(batch, { useClaude });
} catch (err) {
const message = err instanceof Error ? err.message : String(err);
log("warn", ` LLM batch failed: ${message}`);
batchVerdicts = batch.map((p) => ({
asin: p.record.asin,
verdict: "SKIP",
confidence: 0,
reasoning: "LLM analysis failed",
}));
}
const verdictByAsin = new Map(batchVerdicts.map((v) => [v.asin, v]));
const batchResults: AnalysisResult[] = batch.map((product) => ({
product,
verdict: verdictByAsin.get(product.record.asin) ?? {
asin: product.record.asin,
verdict: "SKIP",
confidence: 0,
reasoning: "LLM returned no verdict",
},
}));
await insertProductAnalysisResults(runId, batchResults);
for (const result of batchResults) {
results.push(result);
if (result.verdict.verdict === "FBA") {
fba++;
} else if (result.verdict.verdict === "FBM") {
fbm++;
} else {
skip++;
}
}
await updateCategoryRunSummary(runId, {
topAsinsChecked: uniqueTopAsins.length,
availableAsins: selectedAsins.length,
fba,
fbm,
skip,
status: "running",
error: "",
});
log(
"info",
` Persisted batch ${batchNum}/${totalBatches} (${batchResults.length} rows, totals FBA/FBM/SKIP=${fba}/${fbm}/${skip})`,
);
if (i + LLM_BATCH_SIZE < enrichedProducts.length) {
await sleep(1500);
}
}
await updateCategoryRunSummary(runId, {
topAsinsChecked: uniqueTopAsins.length,
availableAsins: selectedAsins.length,
fba,
fbm,
skip,
status: "ok",
error: "",
});
return {
categoryId: category.id,
categoryLabel: category.label,
topAsinsChecked: uniqueTopAsins.length,
availableAsins: selectedAsins.length,
fba,
fbm,
skip,
status: "ok",
error: "",
results,
};
}
export async function main(): Promise<void> {
const args = parseArgs();
assertSpApiPrerequisites();
mkdirSync(args.outputDir, { recursive: true });
log("info", "Starting per-category monthly-sold pipeline");
log("info", `Marketplace: ${config.spApiMarketplaceId}`);
log("info", `SP-API region: ${config.spApiRegion}`);
log("info", `Category limit: ${args.categoryLimit}`);
log(
"info",
`Top ASINs per category after monthly sort: ${args.perCategoryTop}`,
);
log("info", `Category candidate pool: ${args.categoryCandidatePool}`);
log("info", `Minimum monthly sold: ${args.minMonthlySold}`);
log("info", `Blacklist file: ${args.blacklistFile}`);
const categoryBlacklist = loadCategoryBlacklist(args.blacklistFile);
log("info", `Loaded ${categoryBlacklist.size} blacklisted category IDs.`);
const categories = await discoverCategories(args.categoryLimit);
const allowedCategories = categories.filter(
(c) => !categoryBlacklist.has(c.id),
);
const blacklistedCount = categories.length - allowedCategories.length;
log(
"info",
`Discovered ${categories.length} categories (${blacklistedCount} blacklisted, ${allowedCategories.length} to process).`,
);
const runTimestamp = new Date().toISOString();
let processedCategories = 0;
let totalInsertedAsins = 0;
const allCategorySummaries: CategoryRunSummary[] = [];
for (const category of allowedCategories) {
let categorySummary: CategoryRunSummary;
let runId: number | undefined;
try {
runId = await insertCategoryRunSummary(
{
categoryId: category.id,
categoryLabel: category.label,
topAsinsChecked: 0,
availableAsins: 0,
fba: 0,
fbm: 0,
skip: 0,
status: "running",
error: "",
results: [],
},
runTimestamp,
);
categorySummary = await processCategory(
runId,
category,
args.perCategoryTop,
args.categoryCandidatePool,
args.minMonthlySold,
args.useClaude,
);
totalInsertedAsins += categorySummary.results?.length ?? 0;
processedCategories++;
allCategorySummaries.push({ ...categorySummary, runId });
} catch (err) {
const message = err instanceof Error ? err.message : String(err);
log(
"warn",
`Skipping category ${category.label} (${category.id}) due to error: ${message}`,
);
categorySummary = {
categoryId: category.id,
categoryLabel: category.label,
topAsinsChecked: 0,
availableAsins: 0,
fba: 0,
fbm: 0,
skip: 0,
status: "failed",
error: message,
results: [],
};
if (runId) {
await updateCategoryRunSummary(runId, {
topAsinsChecked: 0,
availableAsins: 0,
fba: 0,
fbm: 0,
skip: 0,
status: "failed",
error: message,
});
}
processedCategories++;
allCategorySummaries.push({ ...categorySummary, runId });
}
}
log("info", "\nRun summary");
log("info", `Categories discovered/selected: ${categories.length}`);
log("info", `Categories processed: ${processedCategories}`);
log("info", `Total ASINs inserted into DB: ${totalInsertedAsins}`);
}
if (import.meta.main) {
main().catch((err) => {
log("error", `Monthly-sold process crashed: ${String(err)}`);
process.exit(1);
});
}