feat: add mid-range sellers by category analysis pipeline

This new pipeline identifies products meeting specific monthly sold, price, seller count, and Amazon buy box share criteria across categories. It fetches comprehensive product data from Keepa and SP-API, analyzes it using an LLM, and persists the results.

A key enhancement is the introduction of a dedicated Redis cache for Keepa and SP-API responses. This reduces API token consumption and improves performance for subsequent runs by caching enriched ASIN data with a 12-hour TTL. Products are saved regardless of their sellability status to provide a complete view.
This commit is contained in:
Victor Noguera
2026-05-02 12:03:31 -04:00
parent 9b832b7839
commit f2c8a9728d
5 changed files with 2409 additions and 1 deletions

View File

@@ -45,6 +45,38 @@ bun run src/sp-test.ts B07SN9BHVV # Auth + sellers endpoint + pricing offer c
bun run src/sp-test.ts --sellability B07SN9BHVV # Standalone sellability check
```
## Category Pipelines
Run category-focused discovery flows with Keepa + SP-API + LLM:
```bash
bun run bestsellers
bun run monthly-sold
bun run mid-range
```
Mid-range process:
- Script: `bun run mid-range`
- Source: `src/mid-range-sellers-by-category.ts`
- Default filters:
- Monthly sold between `100` and `1000`
- Price between `$15` and `$200` (using Keepa current price, fallback avg 90d)
- Seller count between `3` and `20`
- If Amazon is a seller, Amazon buy box share must be between `15%` and `85%`
- Sellability behavior:
- Sellability is still fetched and saved (`can_sell`, `sellability_status`, `sellability_reason`)
- Matching products are persisted regardless of sellability status
- Caching behavior:
- Uses Redis to cache Keepa + SP-API API enrichment per ASIN
- Cache TTL is fixed at `12 hours`
Example:
```bash
bun run mid-range --category-limit 10 --per-category-top 50 --category-candidate-pool 250 --min-monthly-sold 100 --max-monthly-sold 1000 --min-price 15 --max-price 200 --min-seller-count 3 --max-seller-count 20 --min-amazon-buybox-share-pct 15 --max-amazon-buybox-share-pct 85
```
## UPC to ASIN Mapping
You can map UPCs to ASINs directly through the Keepa integration in `src/keepa.ts`.

View File

@@ -6,6 +6,7 @@
"scripts": {
"bestsellers": "bun run src/bestsellers-by-category.ts",
"monthly-sold": "bun run src/top-monthly-sold-by-category.ts",
"mid-range": "bun run src/mid-range-sellers-by-category.ts",
"upc": "bun run src/upc-lookup.ts",
"upc-file": "bun run src/upc-file-analysis.ts",
"start": "bun run src/index.ts",

View File

@@ -1,10 +1,21 @@
import Redis from "ioredis";
import { config } from "./config.ts";
import type { EnrichedProduct } from "./types.ts";
import type { EnrichedProduct, KeepaData, SpApiData } from "./types.ts";
let redis: Redis | null = null;
let disabled = false;
export type ApiCacheEntry = {
title: string;
keepa: KeepaData | null;
spApi: SpApiData;
fetchedAt: string;
};
function getApiCacheKey(asin: string): string {
return `api:asin:${asin}`;
}
export async function connectCache(): Promise<void> {
if (disabled) return;
try {
@@ -58,6 +69,35 @@ export async function setCache(
}
}
export async function getApiCache(asin: string): Promise<ApiCacheEntry | null> {
if (!redis) return null;
try {
const raw = await redis.get(getApiCacheKey(asin));
if (!raw) return null;
return JSON.parse(raw) as ApiCacheEntry;
} catch {
return null;
}
}
export async function setApiCache(
asin: string,
data: ApiCacheEntry,
ttlSeconds: number,
): Promise<void> {
if (!redis) return;
try {
await redis.set(
getApiCacheKey(asin),
JSON.stringify(data),
"EX",
ttlSeconds,
);
} catch {
// Non-critical, continue without caching
}
}
export async function disconnectCache(): Promise<void> {
if (redis) {
await redis.quit();

View File

@@ -0,0 +1,445 @@
import { test, expect, beforeAll, afterAll, beforeEach, mock } from "bun:test";
import { Database } from "bun:sqlite";
import { getDb, initDb, closeDb } from "./database.ts";
import path from "node:path";
import { rmSync, mkdirSync } from "node:fs";
const fetchSellabilityBatchMock = mock(async (asins: string[]) => {
return new Map(
asins.map((asin) => {
if (asin === "B000000003") {
return [
asin,
{
canSell: false,
sellabilityStatus: "restricted" as const,
sellabilityReason: "restricted",
},
];
}
return [
asin,
{
canSell: true,
sellabilityStatus: "available" as const,
sellabilityReason: "ok",
},
];
}),
);
});
const fetchSpApiPricingAndFeesMock = mock(
async (_asin: string, sellability: any) => ({
fbaFee: 4,
fbmFee: 2,
referralFeePercent: 15,
estimatedSalePrice: 25,
canSell: sellability?.canSell ?? null,
sellabilityStatus: sellability?.sellabilityStatus ?? "unknown",
sellabilityReason: sellability?.sellabilityReason ?? "missing",
}),
);
const analyzeProductsMock = mock(async (products: any[]) => {
return products.map((p) => ({
asin: p.record.asin,
verdict: "FBA",
confidence: 90,
reasoning: "mocked",
}));
});
mock.module("./sp-api.ts", () => ({
fetchSellabilityBatch: fetchSellabilityBatchMock,
fetchSpApiPricingAndFees: fetchSpApiPricingAndFeesMock,
}));
mock.module("./llm.ts", () => ({
analyzeProducts: analyzeProductsMock,
}));
const modulePromise = import("./mid-range-sellers-by-category.ts");
const DB_TEST_PATH = path.join(
process.cwd(),
"test_output",
"test_mid_range_analysis.sqlite",
);
let db: Database;
let processCategory: (
db: Database,
runId: number,
category: any,
perCategoryTop: number,
categoryCandidatePool: number,
minMonthlySold: number,
maxMonthlySold: number,
minPrice: number,
maxPrice: number,
minSellerCount: number,
maxSellerCount: number,
minAmazonBuyboxSharePct: number,
maxAmazonBuyboxSharePct: number,
) => Promise<any>;
let insertCategoryRunSummary: (
db: Database,
summary: any,
runTimestamp: string,
) => Promise<number>;
let originalFetch: typeof globalThis.fetch;
beforeAll(async () => {
const mod = await modulePromise;
processCategory = mod.processCategory;
insertCategoryRunSummary = mod.insertCategoryRunSummary;
rmSync(path.dirname(DB_TEST_PATH), { recursive: true, force: true });
mkdirSync(path.dirname(DB_TEST_PATH), { recursive: true });
initDb(DB_TEST_PATH);
db = getDb(DB_TEST_PATH);
originalFetch = globalThis.fetch;
});
afterAll(() => {
globalThis.fetch = originalFetch;
closeDb();
rmSync(path.dirname(DB_TEST_PATH), { recursive: true, force: true });
});
beforeEach(() => {
db.run("DELETE FROM product_analysis_results");
db.run("DELETE FROM category_analysis_runs");
globalThis.fetch = mock(async (input: string | URL | Request) => {
const rawUrl =
typeof input === "string"
? input
: input instanceof URL
? input.toString()
: input.url;
const url = new URL(rawUrl);
if (url.pathname === "/bestsellers") {
return new Response(
JSON.stringify({
bestSellersList: [
"B000000001",
"B000000002",
"B000000003",
"B000000004",
"B000000005",
],
tokensLeft: 10,
refillRate: 1,
}),
{ status: 200 },
);
}
if (url.pathname === "/product") {
return new Response(
JSON.stringify({
products: [
{
asin: "B000000001",
title: "Product One",
monthlySold: 600,
isAmazonSeller: true,
buyBoxStatsAmazon90: 40,
stats: {
current: [
null,
null,
null,
1000,
null,
null,
null,
null,
null,
null,
null,
5,
null,
null,
null,
null,
null,
null,
2599,
],
avg: [2400, null, null, 1200],
},
csv: [[1, 2599]],
categoryTree: [{ name: "Category 1" }],
},
{
asin: "B000000002",
title: "Product Two",
monthlySold: 250,
isAmazonSeller: true,
buyBoxStatsAmazon90: 50,
stats: {
current: [
null,
null,
null,
2000,
null,
null,
null,
null,
null,
null,
null,
3,
null,
null,
null,
null,
null,
null,
1999,
],
avg: [1800, null, null, 2200],
},
csv: [[1, 1200]],
categoryTree: [{ name: "Category 1" }],
},
{
asin: "B000000003",
title: "Product Three",
monthlySold: 800,
isAmazonSeller: true,
buyBoxStatsAmazon90: 50,
stats: {
current: [
null,
null,
null,
1500,
null,
null,
null,
null,
null,
null,
null,
4,
null,
null,
null,
null,
null,
null,
2099,
],
avg: [2000, null, null, 1800],
},
csv: [[1, 2099]],
categoryTree: [{ name: "Category 1" }],
},
{
asin: "B000000004",
title: "Product Four",
monthlySold: 400,
isAmazonSeller: true,
buyBoxStatsAmazon90: 95,
stats: {
current: [
null,
null,
null,
3000,
null,
null,
null,
null,
null,
null,
null,
4,
null,
null,
null,
null,
null,
null,
2899,
],
avg: [2600, null, null, 2800],
},
csv: [[1, 2899]],
categoryTree: [{ name: "Category 1" }],
},
{
asin: "B000000005",
title: "Product Five",
monthlySold: 450,
isAmazonSeller: false,
stats: {
current: [
null,
null,
null,
3200,
null,
null,
null,
null,
null,
null,
null,
25,
null,
null,
null,
null,
null,
null,
3500,
],
avg: [3200, null, null, 3200],
},
csv: [[1, 3500]],
categoryTree: [{ name: "Category 1" }],
},
],
tokensLeft: 10,
refillRate: 1,
}),
{ status: 200 },
);
}
return new Response("not found", { status: 404 });
}) as unknown as typeof globalThis.fetch;
});
test("processCategory keeps mid-range matches even when sellability is restricted", async () => {
const mockCategory = {
id: 1,
label: "Category 1",
parentId: 0,
childCount: 0,
};
const runId = await insertCategoryRunSummary(
db,
{
categoryId: mockCategory.id,
categoryLabel: mockCategory.label,
topAsinsChecked: 0,
availableAsins: 0,
fba: 0,
fbm: 0,
skip: 0,
status: "running",
error: "",
results: [],
},
new Date().toISOString(),
);
const summary = await processCategory(
db,
runId,
mockCategory,
3,
5,
100,
1000,
15,
200,
3,
20,
15,
85,
);
expect(summary.status).toBe("ok");
expect(summary.topAsinsChecked).toBe(5);
expect(summary.availableAsins).toBe(2);
expect(summary.results?.length).toBe(2);
const productResults = db
.query(
"SELECT asin, monthly_sold, can_sell, sellability_status FROM product_analysis_results ORDER BY monthly_sold DESC",
)
.all() as Array<{
asin: string;
monthly_sold: number;
can_sell: string;
sellability_status: string;
}>;
expect(productResults.length).toBe(2);
expect(productResults.map((row) => row.asin)).toEqual([
"B000000003",
"B000000001",
]);
const restricted = productResults.find((row) => row.asin === "B000000003");
expect(restricted?.can_sell).toBe("no");
expect(restricted?.sellability_status).toBe("restricted");
const sellable = productResults.find((row) => row.asin === "B000000001");
expect(sellable?.can_sell).toBe("yes");
expect(sellable?.sellability_status).toBe("available");
});
test("processCategory returns empty when no products match mid-range criteria", async () => {
const mockCategory = {
id: 2,
label: "Category 2",
parentId: 0,
childCount: 0,
};
const runId = await insertCategoryRunSummary(
db,
{
categoryId: mockCategory.id,
categoryLabel: mockCategory.label,
topAsinsChecked: 0,
availableAsins: 0,
fba: 0,
fbm: 0,
skip: 0,
status: "running",
error: "",
results: [],
},
new Date().toISOString(),
);
const summary = await processCategory(
db,
runId,
mockCategory,
3,
5,
100,
1000,
500,
600,
3,
20,
15,
85,
);
expect(summary.status).toBe("empty");
expect(summary.topAsinsChecked).toBe(5);
expect(summary.availableAsins).toBe(0);
expect(summary.results?.length).toBe(0);
const rows = db
.query("SELECT COUNT(*) as c FROM product_analysis_results")
.all() as Array<{ c: number }>;
expect(rows[0]?.c).toBe(0);
});

File diff suppressed because it is too large Load Diff