feat: add mid-range sellers by category analysis pipeline

This new pipeline identifies products meeting specific monthly sold, price, seller count, and Amazon buy box share criteria across categories. It fetches comprehensive product data from Keepa and SP-API, analyzes it using an LLM, and persists the results.

A key enhancement is the introduction of a dedicated Redis cache for Keepa and SP-API responses. This reduces API token consumption and improves performance for subsequent runs by caching enriched ASIN data with a 12-hour TTL. Products are saved regardless of their sellability status to provide a complete view.
This commit is contained in:
Victor Noguera
2026-05-02 12:03:31 -04:00
parent 9b832b7839
commit f2c8a9728d
5 changed files with 2409 additions and 1 deletions

View File

@@ -1,10 +1,21 @@
import Redis from "ioredis";
import { config } from "./config.ts";
import type { EnrichedProduct } from "./types.ts";
import type { EnrichedProduct, KeepaData, SpApiData } from "./types.ts";
let redis: Redis | null = null;
let disabled = false;
export type ApiCacheEntry = {
title: string;
keepa: KeepaData | null;
spApi: SpApiData;
fetchedAt: string;
};
function getApiCacheKey(asin: string): string {
return `api:asin:${asin}`;
}
export async function connectCache(): Promise<void> {
if (disabled) return;
try {
@@ -58,6 +69,35 @@ export async function setCache(
}
}
export async function getApiCache(asin: string): Promise<ApiCacheEntry | null> {
if (!redis) return null;
try {
const raw = await redis.get(getApiCacheKey(asin));
if (!raw) return null;
return JSON.parse(raw) as ApiCacheEntry;
} catch {
return null;
}
}
export async function setApiCache(
asin: string,
data: ApiCacheEntry,
ttlSeconds: number,
): Promise<void> {
if (!redis) return;
try {
await redis.set(
getApiCacheKey(asin),
JSON.stringify(data),
"EX",
ttlSeconds,
);
} catch {
// Non-critical, continue without caching
}
}
export async function disconnectCache(): Promise<void> {
if (redis) {
await redis.quit();

View File

@@ -0,0 +1,445 @@
import { test, expect, beforeAll, afterAll, beforeEach, mock } from "bun:test";
import { Database } from "bun:sqlite";
import { getDb, initDb, closeDb } from "./database.ts";
import path from "node:path";
import { rmSync, mkdirSync } from "node:fs";
const fetchSellabilityBatchMock = mock(async (asins: string[]) => {
return new Map(
asins.map((asin) => {
if (asin === "B000000003") {
return [
asin,
{
canSell: false,
sellabilityStatus: "restricted" as const,
sellabilityReason: "restricted",
},
];
}
return [
asin,
{
canSell: true,
sellabilityStatus: "available" as const,
sellabilityReason: "ok",
},
];
}),
);
});
const fetchSpApiPricingAndFeesMock = mock(
async (_asin: string, sellability: any) => ({
fbaFee: 4,
fbmFee: 2,
referralFeePercent: 15,
estimatedSalePrice: 25,
canSell: sellability?.canSell ?? null,
sellabilityStatus: sellability?.sellabilityStatus ?? "unknown",
sellabilityReason: sellability?.sellabilityReason ?? "missing",
}),
);
const analyzeProductsMock = mock(async (products: any[]) => {
return products.map((p) => ({
asin: p.record.asin,
verdict: "FBA",
confidence: 90,
reasoning: "mocked",
}));
});
mock.module("./sp-api.ts", () => ({
fetchSellabilityBatch: fetchSellabilityBatchMock,
fetchSpApiPricingAndFees: fetchSpApiPricingAndFeesMock,
}));
mock.module("./llm.ts", () => ({
analyzeProducts: analyzeProductsMock,
}));
const modulePromise = import("./mid-range-sellers-by-category.ts");
const DB_TEST_PATH = path.join(
process.cwd(),
"test_output",
"test_mid_range_analysis.sqlite",
);
let db: Database;
let processCategory: (
db: Database,
runId: number,
category: any,
perCategoryTop: number,
categoryCandidatePool: number,
minMonthlySold: number,
maxMonthlySold: number,
minPrice: number,
maxPrice: number,
minSellerCount: number,
maxSellerCount: number,
minAmazonBuyboxSharePct: number,
maxAmazonBuyboxSharePct: number,
) => Promise<any>;
let insertCategoryRunSummary: (
db: Database,
summary: any,
runTimestamp: string,
) => Promise<number>;
let originalFetch: typeof globalThis.fetch;
beforeAll(async () => {
const mod = await modulePromise;
processCategory = mod.processCategory;
insertCategoryRunSummary = mod.insertCategoryRunSummary;
rmSync(path.dirname(DB_TEST_PATH), { recursive: true, force: true });
mkdirSync(path.dirname(DB_TEST_PATH), { recursive: true });
initDb(DB_TEST_PATH);
db = getDb(DB_TEST_PATH);
originalFetch = globalThis.fetch;
});
afterAll(() => {
globalThis.fetch = originalFetch;
closeDb();
rmSync(path.dirname(DB_TEST_PATH), { recursive: true, force: true });
});
beforeEach(() => {
db.run("DELETE FROM product_analysis_results");
db.run("DELETE FROM category_analysis_runs");
globalThis.fetch = mock(async (input: string | URL | Request) => {
const rawUrl =
typeof input === "string"
? input
: input instanceof URL
? input.toString()
: input.url;
const url = new URL(rawUrl);
if (url.pathname === "/bestsellers") {
return new Response(
JSON.stringify({
bestSellersList: [
"B000000001",
"B000000002",
"B000000003",
"B000000004",
"B000000005",
],
tokensLeft: 10,
refillRate: 1,
}),
{ status: 200 },
);
}
if (url.pathname === "/product") {
return new Response(
JSON.stringify({
products: [
{
asin: "B000000001",
title: "Product One",
monthlySold: 600,
isAmazonSeller: true,
buyBoxStatsAmazon90: 40,
stats: {
current: [
null,
null,
null,
1000,
null,
null,
null,
null,
null,
null,
null,
5,
null,
null,
null,
null,
null,
null,
2599,
],
avg: [2400, null, null, 1200],
},
csv: [[1, 2599]],
categoryTree: [{ name: "Category 1" }],
},
{
asin: "B000000002",
title: "Product Two",
monthlySold: 250,
isAmazonSeller: true,
buyBoxStatsAmazon90: 50,
stats: {
current: [
null,
null,
null,
2000,
null,
null,
null,
null,
null,
null,
null,
3,
null,
null,
null,
null,
null,
null,
1999,
],
avg: [1800, null, null, 2200],
},
csv: [[1, 1200]],
categoryTree: [{ name: "Category 1" }],
},
{
asin: "B000000003",
title: "Product Three",
monthlySold: 800,
isAmazonSeller: true,
buyBoxStatsAmazon90: 50,
stats: {
current: [
null,
null,
null,
1500,
null,
null,
null,
null,
null,
null,
null,
4,
null,
null,
null,
null,
null,
null,
2099,
],
avg: [2000, null, null, 1800],
},
csv: [[1, 2099]],
categoryTree: [{ name: "Category 1" }],
},
{
asin: "B000000004",
title: "Product Four",
monthlySold: 400,
isAmazonSeller: true,
buyBoxStatsAmazon90: 95,
stats: {
current: [
null,
null,
null,
3000,
null,
null,
null,
null,
null,
null,
null,
4,
null,
null,
null,
null,
null,
null,
2899,
],
avg: [2600, null, null, 2800],
},
csv: [[1, 2899]],
categoryTree: [{ name: "Category 1" }],
},
{
asin: "B000000005",
title: "Product Five",
monthlySold: 450,
isAmazonSeller: false,
stats: {
current: [
null,
null,
null,
3200,
null,
null,
null,
null,
null,
null,
null,
25,
null,
null,
null,
null,
null,
null,
3500,
],
avg: [3200, null, null, 3200],
},
csv: [[1, 3500]],
categoryTree: [{ name: "Category 1" }],
},
],
tokensLeft: 10,
refillRate: 1,
}),
{ status: 200 },
);
}
return new Response("not found", { status: 404 });
}) as unknown as typeof globalThis.fetch;
});
test("processCategory keeps mid-range matches even when sellability is restricted", async () => {
const mockCategory = {
id: 1,
label: "Category 1",
parentId: 0,
childCount: 0,
};
const runId = await insertCategoryRunSummary(
db,
{
categoryId: mockCategory.id,
categoryLabel: mockCategory.label,
topAsinsChecked: 0,
availableAsins: 0,
fba: 0,
fbm: 0,
skip: 0,
status: "running",
error: "",
results: [],
},
new Date().toISOString(),
);
const summary = await processCategory(
db,
runId,
mockCategory,
3,
5,
100,
1000,
15,
200,
3,
20,
15,
85,
);
expect(summary.status).toBe("ok");
expect(summary.topAsinsChecked).toBe(5);
expect(summary.availableAsins).toBe(2);
expect(summary.results?.length).toBe(2);
const productResults = db
.query(
"SELECT asin, monthly_sold, can_sell, sellability_status FROM product_analysis_results ORDER BY monthly_sold DESC",
)
.all() as Array<{
asin: string;
monthly_sold: number;
can_sell: string;
sellability_status: string;
}>;
expect(productResults.length).toBe(2);
expect(productResults.map((row) => row.asin)).toEqual([
"B000000003",
"B000000001",
]);
const restricted = productResults.find((row) => row.asin === "B000000003");
expect(restricted?.can_sell).toBe("no");
expect(restricted?.sellability_status).toBe("restricted");
const sellable = productResults.find((row) => row.asin === "B000000001");
expect(sellable?.can_sell).toBe("yes");
expect(sellable?.sellability_status).toBe("available");
});
test("processCategory returns empty when no products match mid-range criteria", async () => {
const mockCategory = {
id: 2,
label: "Category 2",
parentId: 0,
childCount: 0,
};
const runId = await insertCategoryRunSummary(
db,
{
categoryId: mockCategory.id,
categoryLabel: mockCategory.label,
topAsinsChecked: 0,
availableAsins: 0,
fba: 0,
fbm: 0,
skip: 0,
status: "running",
error: "",
results: [],
},
new Date().toISOString(),
);
const summary = await processCategory(
db,
runId,
mockCategory,
3,
5,
100,
1000,
500,
600,
3,
20,
15,
85,
);
expect(summary.status).toBe("empty");
expect(summary.topAsinsChecked).toBe(5);
expect(summary.availableAsins).toBe(0);
expect(summary.results?.length).toBe(0);
const rows = db
.query("SELECT COUNT(*) as c FROM product_analysis_results")
.all() as Array<{ c: number }>;
expect(rows[0]?.c).toBe(0);
});

File diff suppressed because it is too large Load Diff