feat: transition bestseller analysis storage to SQLite and add category blacklist

- Replaces Excel output with structured database tables for tracking category analysis runs and product results.
- Implements a blacklist to exclude specific category IDs from the bestseller pipeline.
- Adds unit tests for category processing and enhances logging with levels and timestamps.
- Introduces foreign key enforcement and updated schema definitions in the database module.
This commit is contained in:
Victor Noguera
2026-04-13 00:28:23 -04:00
parent 7ba6397578
commit a906f5ede3
7 changed files with 434 additions and 242 deletions

6
.gitignore vendored
View File

@@ -33,11 +33,13 @@ report.[0-9]_.[0-9]_.[0-9]_.[0-9]_.json
# Finder (MacOS) folder config # Finder (MacOS) folder config
.DS_Store .DS_Store
*.xlsx *.xlsx
*.csv
results.db results.db
results.db-shm results.db-shm
results.db-wal results.db-wal
output/
temp_output/

4
category-blacklist.csv Normal file
View File

@@ -0,0 +1,4 @@
id,name
229534,Software
283155,Books
16310101,Grocery Gourmet Food
1 id name
2 229534 Software
3 283155 Books
4 16310101 Grocery Gourmet Food

View File

@@ -0,0 +1,99 @@
import { test, expect, beforeAll, afterAll, beforeEach, mock } from "bun:test";
import { Database } from "bun:sqlite";
import { getDb, initDb, closeDb } from "./database";
import path from "node:path";
import { rmSync, mkdirSync } from "node:fs";
import {
main,
processCategory,
insertCategoryRunSummary,
insertProductAnalysisResults,
} from "./bestsellers-by-category";
import * as keepaModule from "./keepa";
import * as spApiModule from "./sp-api";
import * as llmModule from "./llm";
const DB_TEST_PATH = path.join(
process.cwd(),
"test_output",
"test_analysis.sqlite",
);
let db: Database;
beforeAll(() => {
// Ensure the test output directory exists and is clean
rmSync(path.dirname(DB_TEST_PATH), { recursive: true, force: true });
mkdirSync(path.dirname(DB_TEST_PATH), { recursive: true });
initDb(DB_TEST_PATH);
db = getDb(DB_TEST_PATH);
});
afterAll(() => {
closeDb();
rmSync(path.dirname(DB_TEST_PATH), { recursive: true, force: true });
});
beforeEach(() => {
// Clear tables before each test if necessary, or use a fresh DB for each test
// For simplicity, we'll assume tables are clean after initDb in beforeAll
// and not clear for each test if data is not interdependent.
});
test("processCategory function test", async () => {
const mockCategory = {
id: 1,
label: "Category 1",
parentId: 0,
childCount: 0,
};
const summary = await processCategory(db, mockCategory, 2);
expect(summary.status).toBe("ok");
expect(summary.categoryId).toBe(mockCategory.id);
expect(summary.categoryLabel).toBe(mockCategory.label);
expect(summary.topAsinsChecked).toBe(2);
expect(summary.availableAsins).toBe(2);
expect(summary.fba).toBe(1);
expect(summary.fbm).toBe(1);
expect(summary.skip).toBe(0);
expect(summary.results?.length).toBe(2);
const runId = await insertCategoryRunSummary(
db,
summary,
new Date().toISOString(),
);
if (summary.results) {
await insertProductAnalysisResults(db, runId, summary.results);
}
// Verify category run summary insertion
const categoryRun = db
.query("SELECT * FROM category_analysis_runs")
.all() as any[];
expect(categoryRun.length).toBe(1);
expect(categoryRun[0].category_label).toBe("Category 1");
expect(categoryRun[0].top_asins_checked).toBe(2);
expect(categoryRun[0].available_asins).toBe(2);
expect(categoryRun[0].fba_count).toBe(1);
expect(categoryRun[0].fbm_count).toBe(1);
expect(categoryRun[0].status).toBe("ok");
// Verify product analysis results insertion
const productResults = db
.query("SELECT * FROM product_analysis_results ORDER BY asin")
.all() as any[];
expect(productResults.length).toBe(2);
expect(productResults[0].asin).toBe("B000000001");
expect(productResults[0].name).toBe("Product One");
expect(productResults[0].verdict).toBe("FBA");
expect(productResults[0].run_id).toBe(categoryRun[0].id);
expect(productResults[1].asin).toBe("B000000002");
expect(productResults[1].name).toBe("Product Two");
expect(productResults[1].verdict).toBe("FBM");
expect(productResults[1].run_id).toBe(categoryRun[0].id);
});

View File

@@ -1,6 +1,6 @@
import { existsSync, mkdirSync, readFileSync } from "node:fs"; import { existsSync, mkdirSync, readFileSync } from "node:fs";
import path from "node:path"; import path from "node:path";
import * as XLSX from "xlsx"; import { type Database, getDb, initDb } from "./database.ts";
import { config } from "./config.ts"; import { config } from "./config.ts";
import { analyzeProducts } from "./llm.ts"; import { analyzeProducts } from "./llm.ts";
import { fetchSellabilityBatch, fetchSpApiPricingAndFees } from "./sp-api.ts"; import { fetchSellabilityBatch, fetchSpApiPricingAndFees } from "./sp-api.ts";
@@ -36,9 +36,10 @@ type CategoryRunSummary = {
fba: number; fba: number;
fbm: number; fbm: number;
skip: number; skip: number;
outputFile: string;
status: "ok" | "empty" | "failed"; status: "ok" | "empty" | "failed";
error: string; error: string;
runId?: number;
results?: AnalysisResult[];
}; };
const KEEPA_BASE = "https://api.keepa.com"; const KEEPA_BASE = "https://api.keepa.com";
@@ -58,6 +59,15 @@ let keepaTokensLeft = 1;
let keepaRefillRate = 1; let keepaRefillRate = 1;
let keepaLastRequestMs = 0; let keepaLastRequestMs = 0;
function log(
level: "info" | "warn" | "error",
message: string,
...args: any[]
) {
const timestamp = new Date().toISOString();
console.log(`[${timestamp}] [${level.toUpperCase()}] ${message}`, ...args);
}
function parseArgs(): ParsedArgs { function parseArgs(): ParsedArgs {
const args = process.argv.slice(2); const args = process.argv.slice(2);
const outputDir = const outputDir =
@@ -99,10 +109,11 @@ function readFlagValue(args: string[], flag: string): string | undefined {
function printUsageAndExit(message: string): never { function printUsageAndExit(message: string): never {
if (message) { if (message) {
console.error(message); log("error", message);
} }
console.error( log(
"error",
[ [
"Usage:", "Usage:",
" bun run src/bestsellers-by-category.ts [--category-limit 32] [--per-category-top 100] [--out-dir output] [--blacklist-file category-blacklist.csv]", " bun run src/bestsellers-by-category.ts [--category-limit 32] [--per-category-top 100] [--out-dir output] [--blacklist-file category-blacklist.csv]",
@@ -118,11 +129,112 @@ function printUsageAndExit(message: string): never {
process.exit(1); process.exit(1);
} }
export async function insertCategoryRunSummary(
db: Database,
summary: CategoryRunSummary,
runTimestamp: string,
): Promise<number> {
const query = `
INSERT INTO category_analysis_runs (
category_id, category_label, run_timestamp,
top_asins_checked, available_asins,
fba_count, fbm_count, skip_count,
status, error_message
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?);
`;
const result = db.run(query, [
summary.categoryId,
summary.categoryLabel,
runTimestamp,
summary.topAsinsChecked,
summary.availableAsins,
summary.fba,
summary.fbm,
summary.skip,
summary.status,
summary.error,
]);
// Bun's SQLite client returns { changes: number, lastInsertRowid: number | bigint }
return Number(result.lastInsertRowid);
}
export async function insertProductAnalysisResults(
db: Database,
runId: number,
results: AnalysisResult[],
): Promise<void> {
if (results.length === 0) {
return;
}
const insertStmt = db.prepare(`
INSERT INTO product_analysis_results (
asin, run_id, name, brand, category, unit_cost,
current_price, avg_price_90d, avg_price_90d_sheet,
selling_price_sheet, sales_rank, sales_rank_avg_90d,
seller_count, monthly_sold, rank_drops_30d, rank_drops_90d,
fba_fee, fbm_fee, referral_percent, can_sell,
sellability_status, sellability_reason,
verdict, confidence, reasoning, fetched_at
) VALUES (
?, ?, ?, ?, ?, ?, ?, ?, ?, ?,
?, ?, ?, ?, ?, ?, ?, ?, ?, ?,
?, ?, ?, ?, ?, ?
);
`);
db.transaction((resultsBatch: AnalysisResult[]) => {
for (const r of resultsBatch) {
const price =
r.product.keepa?.currentPrice ??
r.product.record.sellingPriceFromSheet ??
r.product.spApi.estimatedSalePrice;
const rank = r.product.keepa?.salesRank ?? r.product.record.amazonRank;
insertStmt.run(
r.product.record.asin,
runId,
r.product.record.name,
r.product.record.brand ?? null,
r.product.record.category ??
r.product.keepa?.categoryTree?.join(" > ") ??
null,
r.product.record.unitCost ?? null,
price ?? null,
r.product.keepa?.avgPrice90 ?? null,
r.product.record.avgPrice90FromSheet ?? null,
r.product.record.sellingPriceFromSheet ?? null,
rank ?? null,
r.product.keepa?.salesRankAvg90 ?? null,
r.product.keepa?.sellerCount ?? null,
r.product.keepa?.monthlySold ?? null,
r.product.keepa?.salesRankDrops30 ?? null,
r.product.keepa?.salesRankDrops90 ?? null,
r.product.spApi.fbaFee ?? null,
r.product.spApi.fbmFee ?? null,
r.product.spApi.referralFeePercent ?? null,
r.product.spApi.canSell == null
? "unknown"
: r.product.spApi.canSell
? "yes"
: "no",
r.product.spApi.sellabilityStatus ?? null,
r.product.spApi.sellabilityReason ?? null,
r.verdict.verdict,
r.verdict.confidence,
r.verdict.reasoning ?? null,
r.product.fetchedAt,
);
}
})(results); // Execute the transaction with the results batch
}
function loadCategoryBlacklist(filePath: string): Set<number> { function loadCategoryBlacklist(filePath: string): Set<number> {
const blacklist = new Set<number>(); const blacklist = new Set<number>();
if (!existsSync(filePath)) { if (!existsSync(filePath)) {
console.warn( log(
"warn",
`Blacklist file not found at ${filePath}; continuing with no excluded categories.`, `Blacklist file not found at ${filePath}; continuing with no excluded categories.`,
); );
return blacklist; return blacklist;
@@ -147,7 +259,8 @@ function loadCategoryBlacklist(filePath: string): Set<number> {
} }
if (!idToken) { if (!idToken) {
console.warn( log(
"warn",
`Blacklist CSV line ${lineNumber}: missing id, row ignored (${trimmed}).`, `Blacklist CSV line ${lineNumber}: missing id, row ignored (${trimmed}).`,
); );
continue; continue;
@@ -155,20 +268,23 @@ function loadCategoryBlacklist(filePath: string): Set<number> {
const id = Number(idToken); const id = Number(idToken);
if (!Number.isInteger(id) || id <= 0) { if (!Number.isInteger(id) || id <= 0) {
console.warn( log(
"warn",
`Blacklist CSV line ${lineNumber}: invalid id '${idToken}', row ignored (${trimmed}).`, `Blacklist CSV line ${lineNumber}: invalid id '${idToken}', row ignored (${trimmed}).`,
); );
continue; continue;
} }
if (!nameToken) { if (!nameToken) {
console.warn( log(
"warn",
`Blacklist CSV line ${lineNumber}: missing name for id ${id}; accepted but please add name.`, `Blacklist CSV line ${lineNumber}: missing name for id ${id}; accepted but please add name.`,
); );
} }
if (blacklist.has(id)) { if (blacklist.has(id)) {
console.warn( log(
"warn",
`Blacklist CSV line ${lineNumber}: duplicate id ${id}, keeping first occurrence.`, `Blacklist CSV line ${lineNumber}: duplicate id ${id}, keeping first occurrence.`,
); );
continue; continue;
@@ -252,7 +368,8 @@ async function waitForKeepaToken(): Promise<void> {
(Date.now() - keepaLastRequestMs); (Date.now() - keepaLastRequestMs);
if (waitMs > 0) { if (waitMs > 0) {
console.log( log(
"info",
`Keepa tokens depleted; waiting ${Math.ceil(waitMs / 1000)}s...`, `Keepa tokens depleted; waiting ${Math.ceil(waitMs / 1000)}s...`,
); );
await sleep(waitMs); await sleep(waitMs);
@@ -294,7 +411,8 @@ async function keepaGetJson(pathAndQuery: string): Promise<any> {
rateLimitHits++; rateLimitHits++;
const waitMs = computeBackoffMs(rateLimitHits, rate.refillInMs); const waitMs = computeBackoffMs(rateLimitHits, rate.refillInMs);
console.warn( log(
"warn",
`Keepa rate limited (429). Retry ${rateLimitHits} in ${Math.ceil(waitMs / 1000)}s...`, `Keepa rate limited (429). Retry ${rateLimitHits} in ${Math.ceil(waitMs / 1000)}s...`,
); );
await sleep(waitMs); await sleep(waitMs);
@@ -516,7 +634,8 @@ async function fetchSellabilityMap(
sellability.set(asin, info); sellability.set(asin, info);
} }
console.log( log(
"info",
` Sellability progress: ${Math.min(i + chunk.length, asins.length)}/${asins.length}`, ` Sellability progress: ${Math.min(i + chunk.length, asins.length)}/${asins.length}`,
); );
} }
@@ -548,7 +667,7 @@ async function fetchSpApiMap(
done++; done++;
if (done % 10 === 0 || done === asins.length) { if (done % 10 === 0 || done === asins.length) {
console.log(` Pricing progress: ${done}/${asins.length}`); log("info", ` Pricing progress: ${done}/${asins.length}`);
} }
} }
} }
@@ -640,7 +759,8 @@ async function fetchKeepaEnrichmentMap(
}); });
} }
console.log( log(
"info",
` Keepa enrichment progress: ${Math.min(i + chunk.length, asins.length)}/${asins.length}`, ` Keepa enrichment progress: ${Math.min(i + chunk.length, asins.length)}/${asins.length}`,
); );
} }
@@ -652,10 +772,9 @@ function buildEnrichedProducts(
asins: string[], asins: string[],
sellabilityMap: Map<string, SellabilityInfo>, sellabilityMap: Map<string, SellabilityInfo>,
spApiMap: Map<string, SpApiData>, spApiMap: Map<string, SpApiData>,
titleByAsin: Map<string, string>, keepaEnrichmentMap: Map<string, { keepa: KeepaData; title: string }>,
): EnrichedProduct[] { ): EnrichedProduct[] {
return asins.map((asin) => { return asins.map((asin) => {
const keepa = null;
const sellability = sellabilityMap.get(asin) ?? { const sellability = sellabilityMap.get(asin) ?? {
canSell: null, canSell: null,
sellabilityStatus: "unknown" as const, sellabilityStatus: "unknown" as const,
@@ -672,15 +791,23 @@ function buildEnrichedProducts(
sellabilityReason: sellability.sellabilityReason, sellabilityReason: sellability.sellabilityReason,
}; };
const enrichedKeepa = keepaEnrichmentMap.get(asin);
const keepa = enrichedKeepa?.keepa ?? null;
const title = enrichedKeepa?.title ?? asin;
const record: ProductRecord = { const record: ProductRecord = {
asin, asin,
name: titleByAsin.get(asin) ?? asin, name: title,
unitCost: 0, unitCost: 0,
category: undefined, category: undefined,
brand: undefined, brand: undefined,
supplier: undefined, supplier: undefined,
}; };
if (keepa?.currentPrice && spApi.estimatedSalePrice === 0) {
spApi.estimatedSalePrice = keepa.currentPrice;
}
return { return {
record, record,
keepa, keepa,
@@ -700,14 +827,14 @@ async function runLlmInBatches(
const batchNum = Math.floor(i / LLM_BATCH_SIZE) + 1; const batchNum = Math.floor(i / LLM_BATCH_SIZE) + 1;
const totalBatches = Math.ceil(products.length / LLM_BATCH_SIZE); const totalBatches = Math.ceil(products.length / LLM_BATCH_SIZE);
console.log(` LLM batch ${batchNum}/${totalBatches}...`); log("info", ` LLM batch ${batchNum}/${totalBatches}...`);
let batchVerdicts: LlmVerdict[]; let batchVerdicts: LlmVerdict[];
try { try {
batchVerdicts = await analyzeProducts(batch); batchVerdicts = await analyzeProducts(batch);
} catch (err) { } catch (err) {
const message = err instanceof Error ? err.message : String(err); const message = err instanceof Error ? err.message : String(err);
console.warn(` LLM batch failed: ${message}`); log("warn", ` LLM batch failed: ${message}`);
batchVerdicts = batch.map((p) => ({ batchVerdicts = batch.map((p) => ({
asin: p.record.asin, asin: p.record.asin,
verdict: "SKIP", verdict: "SKIP",
@@ -726,16 +853,16 @@ async function runLlmInBatches(
return verdicts; return verdicts;
} }
async function processCategory( export async function processCategory(
db: Database,
category: CategoryInfo, category: CategoryInfo,
perCategoryTop: number, perCategoryTop: number,
outputDir: string,
): Promise<CategoryRunSummary> { ): Promise<CategoryRunSummary> {
console.log(`\nCategory ${category.label} (${category.id})`); log("info", `\nCategory ${category.label} (${category.id})`);
const topAsins = await fetchCategoryBestSellerAsins(category, perCategoryTop); const topAsins = await fetchCategoryBestSellerAsins(category, perCategoryTop);
if (topAsins.length === 0) { if (topAsins.length === 0) {
console.log(" Keepa returned no ASINs for this category."); log("info", " Keepa returned no ASINs for this category.");
return { return {
categoryId: category.id, categoryId: category.id,
categoryLabel: category.label, categoryLabel: category.label,
@@ -744,13 +871,13 @@ async function processCategory(
fba: 0, fba: 0,
fbm: 0, fbm: 0,
skip: 0, skip: 0,
outputFile: "",
status: "empty", status: "empty",
error: "No ASINs returned by Keepa", error: "No ASINs returned by Keepa",
results: [],
}; };
} }
console.log(` Top ASINs fetched: ${topAsins.length}`); log("info", ` Top ASINs fetched: ${topAsins.length}`);
const sellabilityMap = await fetchSellabilityMap(topAsins); const sellabilityMap = await fetchSellabilityMap(topAsins);
const availableAsins = topAsins.filter((asin) => { const availableAsins = topAsins.filter((asin) => {
@@ -758,7 +885,7 @@ async function processCategory(
return info?.canSell === true && info.sellabilityStatus === "available"; return info?.canSell === true && info.sellabilityStatus === "available";
}); });
console.log(` Sellable ASINs: ${availableAsins.length}/${topAsins.length}`); log("info", ` Sellable ASINs: ${availableAsins.length}/${topAsins.length}`);
if (availableAsins.length === 0) { if (availableAsins.length === 0) {
return { return {
categoryId: category.id, categoryId: category.id,
@@ -768,9 +895,9 @@ async function processCategory(
fba: 0, fba: 0,
fbm: 0, fbm: 0,
skip: 0, skip: 0,
outputFile: "",
status: "empty", status: "empty",
error: "No sellable ASINs", error: "No sellable ASINs",
results: [],
}; };
} }
@@ -793,21 +920,8 @@ async function processCategory(
availableAsins, availableAsins,
sellabilityMap, sellabilityMap,
spApiMap, spApiMap,
titleByAsin, keepaEnrichment,
).map((product) => { );
const keepa = keepaMap.get(product.record.asin) ?? null;
const spApi = product.spApi;
if (keepa?.currentPrice && spApi.estimatedSalePrice === 0) {
spApi.estimatedSalePrice = keepa.currentPrice;
}
return {
...product,
keepa,
spApi,
};
});
const verdicts = await runLlmInBatches(enrichedProducts); const verdicts = await runLlmInBatches(enrichedProducts);
const verdictByAsin = new Map(verdicts.map((v) => [v.asin, v])); const verdictByAsin = new Map(verdicts.map((v) => [v.asin, v]));
@@ -822,9 +936,14 @@ async function processCategory(
}, },
})); }));
const outputName = `${sanitizeFileSegment(category.label)}_${category.id}.xlsx`; // No longer writing to XLSX, directly insert into DB
const outputPath = path.join(outputDir, outputName); // const outputName = `${sanitizeFileSegment(category.label)}_${category.id}.xlsx`;
writeCategoryResultsWorkbook(results, outputPath); // const outputPath = path.join(outputDir, outputName);
// writeCategoryResultsWorkbook(results, outputPath);
// The categoryRunId will be provided by the main function after inserting the summary
// We need to pass it here or get it after inserting the summary in main.
// For now, let's assume it's handled in main.
const fba = results.filter((r) => r.verdict.verdict === "FBA").length; const fba = results.filter((r) => r.verdict.verdict === "FBA").length;
const fbm = results.filter((r) => r.verdict.verdict === "FBM").length; const fbm = results.filter((r) => r.verdict.verdict === "FBM").length;
@@ -838,204 +957,77 @@ async function processCategory(
fba, fba,
fbm, fbm,
skip, skip,
outputFile: path.basename(outputPath),
status: "ok", status: "ok",
error: "", error: "",
results,
}; };
} }
function buildCategoryOutputRow(r: AnalysisResult) { export async function main(): Promise<void> {
const price =
r.product.keepa?.currentPrice ??
r.product.record.sellingPriceFromSheet ??
r.product.spApi.estimatedSalePrice;
const rank = r.product.keepa?.salesRank ?? r.product.record.amazonRank;
return {
ASIN: r.product.record.asin,
Name: r.product.record.name,
Brand: r.product.record.brand ?? "",
Category:
r.product.record.category ??
r.product.keepa?.categoryTree?.join(" > ") ??
"",
"Unit Cost": r.product.record.unitCost,
"Current Price": price ?? "",
"Avg Price 90d": r.product.keepa?.avgPrice90 ?? "",
"Avg Price 90d (sheet)": r.product.record.avgPrice90FromSheet ?? "",
"Selling Price (sheet)": r.product.record.sellingPriceFromSheet ?? "",
"Sales Rank": rank ?? "",
"Rank Avg 90d": r.product.keepa?.salesRankAvg90 ?? "",
Sellers: r.product.keepa?.sellerCount ?? "",
"Monthly Sold": r.product.keepa?.monthlySold ?? "",
"Rank Drops 30d": r.product.keepa?.salesRankDrops30 ?? "",
"Rank Drops 90d": r.product.keepa?.salesRankDrops90 ?? "",
"FBA Fee": r.product.spApi.fbaFee,
"FBM Fee": r.product.spApi.fbmFee,
"Referral %": r.product.spApi.referralFeePercent,
"Can Sell":
r.product.spApi.canSell == null
? "unknown"
: r.product.spApi.canSell
? "yes"
: "no",
Sellability: r.product.spApi.sellabilityStatus,
"Sellability Reason": r.product.spApi.sellabilityReason ?? "",
Verdict: r.verdict.verdict,
Confidence: r.verdict.confidence,
Reasoning: r.verdict.reasoning,
};
}
function writeCategoryResultsWorkbook(
results: AnalysisResult[],
outputPath: string,
): void {
const rows = results.map(buildCategoryOutputRow);
const ws = XLSX.utils.json_to_sheet(rows);
const wb = XLSX.utils.book_new();
XLSX.utils.book_append_sheet(wb, ws, "Results");
XLSX.writeFile(wb, outputPath);
console.log(`Results written to ${outputPath}`);
}
function writeConsolidatedWorkbook(
summaries: CategoryRunSummary[],
outputDir: string,
): string {
const workbook = XLSX.utils.book_new();
const summaryRows = summaries.map((row) => ({
"Category ID": row.categoryId,
"Category Label": row.categoryLabel,
"Top ASINs Checked": row.topAsinsChecked,
"Sellable ASINs": row.availableAsins,
FBA: row.fba,
FBM: row.fbm,
SKIP: row.skip,
Status: row.status,
"Output File": row.outputFile,
Error: row.error,
}));
const totals = summaries.reduce(
(acc, row) => {
acc.topAsinsChecked += row.topAsinsChecked;
acc.availableAsins += row.availableAsins;
acc.fba += row.fba;
acc.fbm += row.fbm;
acc.skip += row.skip;
if (row.status === "ok") acc.ok += 1;
if (row.status === "empty") acc.empty += 1;
if (row.status === "failed") acc.failed += 1;
return acc;
},
{
topAsinsChecked: 0,
availableAsins: 0,
fba: 0,
fbm: 0,
skip: 0,
ok: 0,
empty: 0,
failed: 0,
},
);
const overviewRows = [
{ Metric: "Categories total", Value: summaries.length },
{ Metric: "Categories with output", Value: totals.ok },
{ Metric: "Categories empty", Value: totals.empty },
{ Metric: "Categories failed", Value: totals.failed },
{ Metric: "Top ASINs checked", Value: totals.topAsinsChecked },
{ Metric: "Sellable ASINs", Value: totals.availableAsins },
{ Metric: "Total FBA verdicts", Value: totals.fba },
{ Metric: "Total FBM verdicts", Value: totals.fbm },
{ Metric: "Total SKIP verdicts", Value: totals.skip },
];
const summarySheet = XLSX.utils.json_to_sheet(summaryRows);
const overviewSheet = XLSX.utils.json_to_sheet(overviewRows);
XLSX.utils.book_append_sheet(workbook, overviewSheet, "Overview");
XLSX.utils.book_append_sheet(workbook, summarySheet, "ByCategory");
const outputPath = path.join(
outputDir,
"consolidated_bestsellers_summary.xlsx",
);
XLSX.writeFile(workbook, outputPath);
return outputPath;
}
function printSummary(
categories: CategoryInfo[],
processed: number,
generatedFiles: number,
totalTopAsins: number,
totalAvailableAsins: number,
): void {
console.log("\nRun summary");
console.log(`Categories discovered/selected: ${categories.length}`);
console.log(`Categories processed: ${processed}`);
console.log(`Category files written: ${generatedFiles}`);
console.log(`Top ASINs checked: ${totalTopAsins}`);
console.log(`Sellable ASINs enriched: ${totalAvailableAsins}`);
}
async function main(): Promise<void> {
const args = parseArgs(); const args = parseArgs();
assertSpApiPrerequisites(); assertSpApiPrerequisites();
mkdirSync(args.outputDir, { recursive: true }); mkdirSync(args.outputDir, { recursive: true });
const DB_PATH = path.join(args.outputDir, "analysis.sqlite");
initDb(DB_PATH);
const db = getDb(DB_PATH);
console.log("Starting per-category bestseller pipeline"); log("info", "Starting per-category bestseller pipeline");
console.log(`Marketplace: ${config.spApiMarketplaceId}`); log("info", `Marketplace: ${config.spApiMarketplaceId}`);
console.log(`SP-API region: ${config.spApiRegion}`); log("info", `SP-API region: ${config.spApiRegion}`);
console.log(`Category limit: ${args.categoryLimit}`); log("info", `Category limit: ${args.categoryLimit}`);
console.log(`Top ASINs per category: ${args.perCategoryTop}`); log("info", `Top ASINs per category: ${args.perCategoryTop}`);
console.log(`Output directory: ${args.outputDir}`); // Removed outputDir logging as it's not directly used for XLSX anymore
console.log(`Blacklist file: ${args.blacklistFile}`); // console.log(`Output directory: ${args.outputDir}`);
log("info", `Blacklist file: ${args.blacklistFile}`);
const categoryBlacklist = loadCategoryBlacklist(args.blacklistFile); const categoryBlacklist = loadCategoryBlacklist(args.blacklistFile);
console.log(`Loaded ${categoryBlacklist.size} blacklisted category IDs.`); log("info", `Loaded ${categoryBlacklist.size} blacklisted category IDs.`);
const categories = await discoverCategories(args.categoryLimit); const categories = await discoverCategories(args.categoryLimit);
const allowedCategories = categories.filter( const allowedCategories = categories.filter(
(c) => !categoryBlacklist.has(c.id), (c) => !categoryBlacklist.has(c.id),
); );
const blacklistedCount = categories.length - allowedCategories.length; const blacklistedCount = categories.length - allowedCategories.length;
console.log( log(
"info",
`Discovered ${categories.length} categories (${blacklistedCount} blacklisted, ${allowedCategories.length} to process).`, `Discovered ${categories.length} categories (${blacklistedCount} blacklisted, ${allowedCategories.length} to process).`,
); );
let processed = 0; const runTimestamp = new Date().toISOString();
let generatedFiles = 0; let processedCategories = 0;
let totalTopAsins = 0; let totalInsertedAsins = 0;
let totalAvailableAsins = 0; const allCategorySummaries: CategoryRunSummary[] = [];
const categorySummaries: CategoryRunSummary[] = [];
for (const category of allowedCategories) { for (const category of allowedCategories) {
let categorySummary: CategoryRunSummary;
try { try {
const outcome = await processCategory( categorySummary = await processCategory(
db,
category, category,
args.perCategoryTop, args.perCategoryTop,
args.outputDir,
); );
processed++; const runId = await insertCategoryRunSummary(
totalTopAsins += outcome.topAsinsChecked; db,
totalAvailableAsins += outcome.availableAsins; categorySummary,
if (outcome.status === "ok") { runTimestamp,
generatedFiles++; );
if (categorySummary.results) {
await insertProductAnalysisResults(db, runId, categorySummary.results);
totalInsertedAsins += categorySummary.results.length;
} }
categorySummaries.push(outcome);
processedCategories++;
allCategorySummaries.push({ ...categorySummary, runId });
} catch (err) { } catch (err) {
const message = err instanceof Error ? err.message : String(err); const message = err instanceof Error ? err.message : String(err);
console.warn( log(
"warn",
`Skipping category ${category.label} (${category.id}) due to error: ${message}`, `Skipping category ${category.label} (${category.id}) due to error: ${message}`,
); );
processed++; categorySummary = {
categorySummaries.push({
categoryId: category.id, categoryId: category.id,
categoryLabel: category.label, categoryLabel: category.label,
topAsinsChecked: 0, topAsinsChecked: 0,
@@ -1043,26 +1035,24 @@ async function main(): Promise<void> {
fba: 0, fba: 0,
fbm: 0, fbm: 0,
skip: 0, skip: 0,
outputFile: "",
status: "failed", status: "failed",
error: message, error: message,
}); results: [],
};
processedCategories++;
allCategorySummaries.push(categorySummary);
} }
} }
const consolidatedPath = writeConsolidatedWorkbook( log("info", "\nRun summary");
categorySummaries, log("info", `Categories discovered/selected: ${categories.length}`);
args.outputDir, log("info", `Categories processed: ${processedCategories}`);
); log("info", `Total ASINs inserted into DB: ${totalInsertedAsins}`);
console.log(`Consolidated workbook written: ${consolidatedPath}`);
printSummary(
allowedCategories,
processed,
generatedFiles,
totalTopAsins,
totalAvailableAsins,
);
} }
await main(); if (import.meta.main) {
main().catch((err) => {
log("error", `Bestsellers process crashed: ${String(err)}`);
process.exit(1);
});
}

21
src/check_db.ts Normal file
View File

@@ -0,0 +1,21 @@
import { getDb } from "./database.ts";
import path from "node:path";
async function checkDb() {
const DB_PATH = path.join(process.cwd(), "temp_output", "analysis.sqlite");
const db = getDb(DB_PATH);
try {
const query = db.query(
"SELECT * FROM category_analysis_runs WHERE category_id = ?",
);
const result = query.all(19419898011);
console.log(JSON.stringify(result, null, 2));
} catch (error) {
console.error("Database query failed:", error);
} finally {
db.close();
}
}
await checkDb();

View File

@@ -1,4 +1,5 @@
import { Database } from "bun:sqlite"; import { Database } from "bun:sqlite";
export { Database } from "bun:sqlite";
let db: Database | null = null; let db: Database | null = null;
@@ -6,6 +7,7 @@ export function getDb(dbPath: string): Database {
if (!db) { if (!db) {
db = new Database(dbPath); db = new Database(dbPath);
db.run("PRAGMA journal_mode = WAL;"); // Enable WAL mode for better performance db.run("PRAGMA journal_mode = WAL;"); // Enable WAL mode for better performance
db.run("PRAGMA foreign_keys = ON;"); // Enforce foreign key constraints
} }
return db; return db;
} }
@@ -50,20 +52,6 @@ export function initDb(dbPath: string): void {
monthly_sold INTEGER, monthly_sold INTEGER,
rank_drops_30d INTEGER, rank_drops_30d INTEGER,
rank_drops_90d INTEGER, rank_drops_90d INTEGER,
fba_net_sheet REAL,
gross_profit_dollar REAL,
gross_profit_pct REAL,
net_profit_sheet REAL,
roi_sheet REAL,
moq INTEGER,
moq_cost REAL,
qty_available INTEGER,
supplier TEXT,
source_url TEXT,
asin_link TEXT,
promo_coupon_code TEXT,
notes TEXT,
lead_date TEXT,
fba_fee REAL, fba_fee REAL,
fbm_fee REAL, fbm_fee REAL,
referral_percent REAL, referral_percent REAL,
@@ -77,4 +65,50 @@ export function initDb(dbPath: string): void {
FOREIGN KEY (run_id) REFERENCES runs(id) FOREIGN KEY (run_id) REFERENCES runs(id)
); );
`); `);
database.run(`
CREATE TABLE IF NOT EXISTS category_analysis_runs (
id INTEGER PRIMARY KEY AUTOINCREMENT,
category_id INTEGER NOT NULL,
category_label TEXT NOT NULL,
run_timestamp TEXT NOT NULL,
top_asins_checked INTEGER NOT NULL,
available_asins INTEGER NOT NULL,
fba_count INTEGER NOT NULL,
fbm_count INTEGER NOT NULL,
skip_count INTEGER NOT NULL,
status TEXT NOT NULL,
error_message TEXT
);
`);
database.run(`
CREATE TABLE IF NOT EXISTS product_analysis_results (
asin TEXT PRIMARY KEY,
run_id INTEGER NOT NULL,
name TEXT NOT NULL,
brand TEXT,
category TEXT,
unit_cost REAL,
current_price REAL,
avg_price_90d REAL,
avg_price_90d_sheet REAL,
selling_price_sheet REAL,
sales_rank INTEGER,
sales_rank_avg_90d INTEGER,
seller_count INTEGER,
monthly_sold INTEGER,
rank_drops_30d INTEGER,
rank_drops_90d INTEGER,
fba_fee REAL,
fbm_fee REAL,
referral_percent REAL,
can_sell TEXT,
sellability_status TEXT,
sellability_reason TEXT,
verdict TEXT NOT NULL,
confidence REAL NOT NULL,
reasoning TEXT,
fetched_at TEXT NOT NULL,
FOREIGN KEY (run_id) REFERENCES category_analysis_runs(id)
);
`);
} }

View File

@@ -73,3 +73,45 @@ export interface AnalysisResult {
product: EnrichedProduct; product: EnrichedProduct;
verdict: LlmVerdict; verdict: LlmVerdict;
} }
export interface CategoryRunSummaryDb {
categoryId: number;
categoryLabel: string;
runTimestamp: string;
topAsinsChecked: number;
availableAsins: number;
fbaCount: number;
fbmCount: number;
skipCount: number;
status: "ok" | "empty" | "failed";
errorMessage?: string;
}
export interface ProductAnalysisResultDb {
asin: string;
runId: number;
name: string;
brand?: string;
category?: string;
unitCost?: number;
currentPrice?: number;
avgPrice90d?: number;
avgPrice90dSheet?: number;
sellingPriceSheet?: number;
salesRank?: number;
salesRankAvg90d?: number;
sellerCount?: number;
monthlySold?: number;
rankDrops30d?: number;
rankDrops90d?: number;
fbaFee?: number;
fbmFee?: number;
referralPercent?: number;
canSell?: string;
sellabilityStatus?: string;
sellabilityReason?: string;
verdict: string;
confidence: number;
reasoning?: string;
fetchedAt: string;
}