Merge branch 'searxng'

This commit is contained in:
Victor Noguera
2026-05-20 18:35:53 -04:00
6 changed files with 1273 additions and 5 deletions

View File

@@ -12,4 +12,7 @@ AWS_SECRET_ACCESS_KEY=your_aws_secret_access_key
REDIS_URL=redis://localhost:6379
LLM_URL=http://localhost:1234/v1
LLM_MODEL=default
CACHE_TTL=86400
CACHE_TTL=86400
GOOGLE_API_KEY=your_google_api_key
GOOGLE_CSE_ID=your_google_programmable_search_engine_id
SERPAPI_API_KEY=your_serpapi_api_key_for_google_shopping

View File

@@ -8,6 +8,7 @@
"monthly-sold": "bun run src/top-monthly-sold-by-category.ts",
"mid-range": "bun run src/mid-range-sellers-by-category.ts",
"stalker": "bun run src/stalker.ts",
"search-offers": "bun run src/asin-offer-search.ts",
"upc": "bun run src/upc-lookup.ts",
"upc-file": "bun run src/upc-file-analysis.ts",
"start": "bun run src/index.ts",

134
src/asin-offer-search.ts Normal file
View File

@@ -0,0 +1,134 @@
import { searchProductOffers, type SearxngOfferSearchResult } from "./searxng.ts";
type CliArgs = {
query: string;
json: boolean;
provider?: "serpapi" | "google-custom-search" | "searxng";
categories?: string;
engines?: string;
limit?: number;
};
function readFlagValue(args: string[], flag: string): string | undefined {
const equalsArg = args.find((arg) => arg.startsWith(`${flag}=`));
if (equalsArg) return equalsArg.slice(flag.length + 1);
const index = args.indexOf(flag);
return index === -1 ? undefined : args[index + 1];
}
function parseArgs(args: string[]): CliArgs {
const json = args.includes("--json");
const shopping = args.includes("--shopping");
const providerRaw = readFlagValue(args, "--provider");
const engineRaw = readFlagValue(args, "--engine");
const categoryRaw = readFlagValue(args, "--category");
const limitRaw = readFlagValue(args, "--limit");
const limit = limitRaw == null ? undefined : Number(limitRaw);
const categories = categoryRaw ?? (shopping ? "shopping" : undefined);
const provider = normalizeProvider(providerRaw);
const queryParts = args.filter((arg, index) => {
if (arg.startsWith("--")) return false;
const previous = args[index - 1];
return (
previous !== "--limit" &&
previous !== "--category" &&
previous !== "--engine" &&
previous !== "--provider"
);
});
const query = queryParts.join(" ").trim();
if (!query) {
console.error(
'Usage: bun run search-offers "product search terms" [--limit 10] [--provider serpapi|google-custom-search|searxng] [--json]',
);
process.exit(1);
}
if (
limitRaw != null &&
(limit == null || !Number.isInteger(limit) || limit <= 0)
) {
console.error("--limit must be a positive integer.");
process.exit(1);
}
return {
query,
json,
provider,
categories,
engines: engineRaw,
limit,
};
}
function printTable(results: SearxngOfferSearchResult[]): void {
if (results.length === 0) {
console.log("No offer results found.");
return;
}
console.table(
results.map((result) => ({
Rank: result.rank,
Score: result.score,
ASIN: result.matchedAsin ?? "",
Price: formatPrice(result),
"Price Label": result.detectedPriceLabel ?? "",
Domain: result.domain,
Title: result.title,
URL: result.url,
})),
);
}
async function main(): Promise<void> {
const args = parseArgs(process.argv.slice(2));
const options = {
maxResults: args.limit,
provider: args.provider,
categories: args.categories,
engines: args.engines,
};
const results = await searchProductOffers(args.query, options);
if (args.json) {
console.log(JSON.stringify(results, null, 2));
return;
}
printTable(results);
}
function normalizeProvider(
value: string | undefined,
): "serpapi" | "google-custom-search" | "searxng" | undefined {
if (value == null) return undefined;
const provider = value.trim().toLowerCase();
if (provider === "serpapi" || provider === "google-shopping") {
return "serpapi";
}
if (provider === "google-custom-search") {
return "google-custom-search";
}
if (provider === "searxng") return provider;
console.error("--provider must be one of: serpapi, google-custom-search, searxng");
process.exit(1);
}
function formatPrice(result: SearxngOfferSearchResult): string {
if (result.detectedPrice == null) return "";
if (result.detectedPriceText) return result.detectedPriceText;
const currency = result.detectedPriceCurrency ?? "USD";
return currency === "USD"
? `$${result.detectedPrice}`
: `${currency} ${result.detectedPrice}`;
}
main().catch((err) => {
console.error(`Search failed: ${err instanceof Error ? err.message : err}`);
process.exit(1);
});

View File

@@ -18,10 +18,13 @@ function optionalBoolean(key: string, fallback: boolean): boolean {
export const config = {
keepaApiKey: required("KEEPA_API_KEY"),
redisUrl: optional("REDIS_URL", "redis://localhost:6379"),
llmUrl: optional("LLM_URL", "http://localhost:1234/v1"),
llmModel: optional("LLM_MODEL", "default"),
cacheTtl: parseInt(optional("CACHE_TTL", "86400"), 10),
spApiClientId: Bun.env.SP_API_CLIENT_ID,
llmUrl: optional("LLM_URL", "http://localhost:1234/v1"),
llmModel: optional("LLM_MODEL", "default"),
cacheTtl: parseInt(optional("CACHE_TTL", "86400"), 10),
searxngUrl: optional("SEARXNG_URL", "https://searxng.nvictor.me/"),
searxngTimeoutMs: parseInt(optional("SEARXNG_TIMEOUT_MS", "10000"), 10),
searxngMaxResults: parseInt(optional("SEARXNG_MAX_RESULTS", "10"), 10),
spApiClientId: Bun.env.SP_API_CLIENT_ID,
spApiClientSecret: Bun.env.SP_API_CLIENT_SECRET,
spApiRefreshToken: Bun.env.SP_API_REFRESH_TOKEN,
spApiRegion: optional("SP_API_REGION", "na"),

350
src/searxng.test.ts Normal file
View File

@@ -0,0 +1,350 @@
import { afterAll, beforeEach, expect, mock, test } from "bun:test";
import { normalizeAsin, searchProductOffers } from "./searxng.ts";
const originalFetch = globalThis.fetch;
beforeEach(() => {
globalThis.fetch = originalFetch;
});
afterAll(() => {
globalThis.fetch = originalFetch;
});
test("normalizeAsin uppercases and validates ASINs", () => {
expect(normalizeAsin(" b07sn9bhvv ")).toBe("B07SN9BHVV");
expect(() => normalizeAsin("not-an-asin")).toThrow("Invalid ASIN");
});
test("searchProductOffers derives ASIN search behavior for ASIN-only queries", async () => {
const fetchMock = mock(async (input: string | URL | Request) => {
const url = input instanceof URL ? input : new URL(String(input));
expect(url.pathname).toBe("/search");
expect(url.searchParams.get("format")).toBe("json");
expect(url.searchParams.get("q")).toBe("B07SN9BHVV price sale offer buy online");
return Response.json({
results: [
{
title: "Amazon listing B07SN9BHVV",
url: "https://www.amazon.com/dp/B07SN9BHVV",
content: "Official marketplace listing.",
engines: ["duckduckgo"],
},
{
title: "Romand palette offer",
url: "https://example-shop.com/item",
content: "Buy product ASIN B07SN9BHVV. Offer price: $12.99 today.",
engines: ["brave"],
},
],
});
});
const results = await searchProductOffers("B07SN9BHVV", {
provider: "searxng",
baseUrl: "https://searxng.test/",
fetchImpl: fetchMock as unknown as typeof fetch,
maxResults: 10,
});
expect(results).toHaveLength(2);
expect(results[0]?.domain).toBe("example-shop.com");
expect(results[0]?.matchedAsin).toBe("B07SN9BHVV");
expect(results[0]?.detectedPrice).toBe(12.99);
expect(results[0]?.detectedPriceCurrency).toBe("USD");
expect(results[0]?.detectedPriceLabel).toBe("offer price");
expect(results[0]?.detectedPriceText).toBe("$12.99");
expect(results[0]?.engines).toEqual(["brave"]);
expect(fetchMock).toHaveBeenCalledTimes(1);
});
test("searchProductOffers falls back to HTML when JSON is unavailable", async () => {
const html = `
<article class="result result-default category-general">
<a class="url_header" href="https://supplier.example/products/romand"></a>
<h3><a href="https://supplier.example/products/romand">Supplier offer B07SN9BHVV</a></h3>
<p class="content">Wholesale product sale price: USD 9.50 with ASIN B07SN9BHVV.</p>
<div class="engines"><span>duckduckgo</span></div>
</article>
`;
const fetchMock = mock(async (input: string | URL | Request) => {
const url = input instanceof URL ? input : new URL(String(input));
if (url.searchParams.get("format") === "json") {
return new Response("forbidden", { status: 403 });
}
return new Response(html, {
status: 200,
headers: { "content-type": "text/html" },
});
});
const results = await searchProductOffers("B07SN9BHVV", {
provider: "searxng",
baseUrl: "https://searxng.test/",
fetchImpl: fetchMock as unknown as typeof fetch,
});
expect(results).toHaveLength(1);
expect(results[0]?.title).toBe("Supplier offer B07SN9BHVV");
expect(results[0]?.domain).toBe("supplier.example");
expect(results[0]?.detectedPrice).toBe(9.5);
expect(results[0]?.detectedPriceLabel).toBe("sale price");
expect(results[0]?.detectedPriceText).toBe("USD 9.50");
expect(results[0]?.matchedAsin).toBe("B07SN9BHVV");
expect(results[0]?.engines).toEqual(["duckduckgo"]);
expect(fetchMock).toHaveBeenCalledTimes(2);
});
test("searchProductOffers detects common selling and sale price formats", async () => {
const fetchMock = mock(async () =>
Response.json({
results: [
{
title: "Supplier page",
url: "https://supplier.example/item",
content: "Selling price is €18.75 and list price is $24.00.",
},
{
title: "Backup page",
url: "https://backup.example/item",
content: "Available now for 22.10 USD.",
},
],
}),
);
const results = await searchProductOffers("romand palette price", {
provider: "searxng",
baseUrl: "https://searxng.test/",
fetchImpl: fetchMock as unknown as typeof fetch,
maxResults: 2,
});
expect(results[0]?.detectedPrice).toBe(18.75);
expect(results[0]?.detectedPriceCurrency).toBe("EUR");
expect(results[0]?.detectedPriceLabel).toBe("selling price");
expect(results[1]?.detectedPrice).toBe(22.1);
expect(results[1]?.detectedPriceCurrency).toBe("USD");
});
test("searchProductOffers filters unrelated priced results for ASIN-only queries", async () => {
const fetchMock = mock(async () =>
Response.json({
results: [
{
title: "Unrelated deal",
url: "https://deals.example/phones",
content: "This price is $449 but it is for another product.",
},
{
title: "Amazon listing B07SN9BHVV",
url: "https://www.amazon.in/dp/B07SN9BHVV",
content: "1 offer from ₹550.00 · Buying options.",
},
],
}),
);
const results = await searchProductOffers("B07SN9BHVV", {
provider: "searxng",
baseUrl: "https://searxng.test/",
fetchImpl: fetchMock as unknown as typeof fetch,
});
expect(results).toHaveLength(1);
expect(results[0]?.matchedAsin).toBe("B07SN9BHVV");
expect(results[0]?.detectedPrice).toBe(550);
expect(results[0]?.detectedPriceCurrency).toBe("INR");
expect(results[0]?.detectedPriceText).toBe("₹550.00");
});
test("searchProductOffers keeps arbitrary query strings generic", async () => {
const fetchMock = mock(async (input: string | URL | Request) => {
const url = input instanceof URL ? input : new URL(String(input));
expect(url.searchParams.get("q")).toBe("romand dry mango tulip price");
return Response.json({
results: [
{
title: "Generic result",
url: "https://shop.example/romand",
content: "Sale price: $14.25",
},
],
});
});
const results = await searchProductOffers("romand dry mango tulip price", {
provider: "searxng",
baseUrl: "https://searxng.test/",
fetchImpl: fetchMock as unknown as typeof fetch,
});
expect(results).toHaveLength(1);
expect(results[0]?.asin).toBeUndefined();
expect(results[0]?.detectedPrice).toBe(14.25);
});
test("searchProductOffers sends configured categories", async () => {
const fetchMock = mock(async (input: string | URL | Request) => {
const url = input instanceof URL ? input : new URL(String(input));
expect(url.searchParams.get("categories")).toBe("shopping");
return Response.json({
results: [
{
title: "Shopping result",
url: "https://shop.example/item",
content: "Offer price: $10.00",
},
],
});
});
const results = await searchProductOffers("romand price", {
provider: "searxng",
baseUrl: "https://searxng.test/",
categories: "shopping",
fetchImpl: fetchMock as unknown as typeof fetch,
});
expect(results[0]?.detectedPrice).toBe(10);
});
test("searchProductOffers sends configured SearXNG engines", async () => {
const fetchMock = mock(async (input: string | URL | Request) => {
const url = input instanceof URL ? input : new URL(String(input));
expect(url.searchParams.get("engines")).toBe("google");
expect(url.searchParams.get("q")).toBe("!go romand price");
return Response.json({
results: [
{
title: "Google-backed result",
url: "https://shop.example/item",
content: "Offer price: $11.00",
engine: "google",
},
],
});
});
const results = await searchProductOffers("romand price", {
provider: "searxng",
baseUrl: "https://searxng.test/",
engines: "google",
fetchImpl: fetchMock as unknown as typeof fetch,
});
expect(results[0]?.detectedPrice).toBe(11);
expect(results[0]?.engines).toEqual(["google"]);
});
test("searchProductOffers uses Google Custom Search API and pagemap offer prices", async () => {
const fetchMock = mock(async (input: string | URL | Request) => {
const url = input instanceof URL ? input : new URL(String(input));
expect(url.hostname).toBe("googleapis.test");
expect(url.searchParams.get("key")).toBe("test-key");
expect(url.searchParams.get("cx")).toBe("test-cx");
expect(url.searchParams.get("num")).toBe("5");
expect(url.searchParams.get("q")).toBe("romand dry mango tulip");
return Response.json({
items: [
{
title: "Romand Dry Mango Tulip",
link: "https://store.example/romand",
snippet: "Buy from Store Example.",
pagemap: {
offer: [{ price: "12.50", pricecurrency: "USD" }],
},
},
],
});
});
const results = await searchProductOffers("romand dry mango tulip", {
provider: "google-custom-search",
baseUrl: "https://googleapis.test/customsearch/v1",
googleApiKey: "test-key",
googleCx: "test-cx",
maxResults: 5,
fetchImpl: fetchMock as unknown as typeof fetch,
});
expect(results).toHaveLength(1);
expect(results[0]?.title).toContain("Romand Dry Mango Tulip");
expect(results[0]?.domain).toBe("store.example");
expect(results[0]?.detectedPrice).toBe(12.5);
expect(results[0]?.detectedPriceLabel).toBe("offer price");
expect(results[0]?.engines).toEqual(["google custom search"]);
});
test("searchProductOffers defaults to SerpApi Google Shopping results", async () => {
const fetchMock = mock(async (input: string | URL | Request) => {
const url = input instanceof URL ? input : new URL(String(input));
expect(url.hostname).toBe("serpapi.test");
expect(url.searchParams.get("engine")).toBe("google_shopping");
expect(url.searchParams.get("q")).toBe("dry mango tulip price");
expect(url.searchParams.get("api_key")).toBe("serpapi-key");
expect(url.searchParams.get("gl")).toBe("us");
expect(url.searchParams.get("hl")).toBe("en");
return Response.json({
shopping_results: [
{
position: 1,
title: "Romand Better Than Eyes Dry Mango Tulip",
source: "K-Beauty Store",
link: "https://store.example/products/romand",
price: "$13.40",
extracted_price: 13.4,
delivery: "$4.99 delivery",
rating: 4.7,
reviews: 128,
},
],
});
});
const results = await searchProductOffers("dry mango tulip price", {
baseUrl: "https://serpapi.test/search.json",
serpapiApiKey: "serpapi-key",
fetchImpl: fetchMock as unknown as typeof fetch,
});
expect(results).toHaveLength(1);
expect(results[0]?.domain).toBe("store.example");
expect(results[0]?.detectedPrice).toBe(13.4);
expect(results[0]?.detectedPriceText).toBe("$13.40");
expect(results[0]?.engines).toEqual(["serpapi google shopping"]);
});
test("searchProductOffers applies result limits and handles empty results", async () => {
const fetchMock = mock(async () =>
Response.json({
results: [
{ title: "One", url: "https://one.example", content: "No price" },
{ title: "Two", url: "https://two.example", content: "$20.00" },
],
}),
);
const limited = await searchProductOffers("romand palette", {
provider: "searxng",
baseUrl: "https://searxng.test/",
fetchImpl: fetchMock as unknown as typeof fetch,
maxResults: 1,
});
expect(limited).toHaveLength(1);
expect(limited[0]?.domain).toBe("two.example");
const emptyFetch = mock(async () => Response.json({ results: [] }));
const empty = await searchProductOffers("missing product", {
provider: "searxng",
baseUrl: "https://searxng.test/",
fetchImpl: emptyFetch as unknown as typeof fetch,
});
expect(empty).toEqual([]);
});

777
src/searxng.ts Normal file
View File

@@ -0,0 +1,777 @@
const DEFAULT_SEARXNG_URL = "https://searxng.nvictor.me/";
const DEFAULT_GOOGLE_CUSTOM_SEARCH_URL =
"https://www.googleapis.com/customsearch/v1";
const DEFAULT_SERPAPI_URL = "https://serpapi.com/search.json";
const DEFAULT_TIMEOUT_MS = 10_000;
const DEFAULT_MAX_RESULTS = 10;
const ASIN_REGEX = /^B[0-9A-Z]{9}$/;
const ASIN_MATCH_REGEX = /\bB[0-9A-Z]{9}\b/gi;
const PRICE_LABELS = [
"selling price",
"sale price",
"offer price",
"current price",
"our price",
"list price",
"price",
] as const;
const CURRENCY_CODES = "USD|US\\$|EUR|GBP|INR|CAD|AUD";
const CURRENCY_SYMBOLS = "$€£₹";
const LABELED_PRICE_REGEX =
new RegExp(
`\\b(selling price|sale price|offer price|current price|our price|list price|price)\\b[^${escapeForCharClass(CURRENCY_SYMBOLS)}0-9]{0,24}((?:${CURRENCY_CODES})?\\s*[${escapeForCharClass(CURRENCY_SYMBOLS)}]\\s*[0-9]{1,5}(?:,[0-9]{3})*(?:\\.[0-9]{2})?|(?:${CURRENCY_CODES})\\s*[0-9]{1,5}(?:,[0-9]{3})*(?:\\.[0-9]{2})?)`,
"gi",
);
const PRICE_REGEX = new RegExp(
`((?:${CURRENCY_CODES})?\\s*[${escapeForCharClass(CURRENCY_SYMBOLS)}]\\s*[0-9]{1,5}(?:,[0-9]{3})*(?:\\.[0-9]{2})?|(?:${CURRENCY_CODES})\\s*[0-9]{1,5}(?:,[0-9]{3})*(?:\\.[0-9]{2})?|[0-9]{1,5}(?:,[0-9]{3})*(?:\\.[0-9]{2})?\\s*(?:${CURRENCY_CODES}))`,
"gi",
);
export type SearxngOfferSearchResult = {
asin?: string;
query: string;
title: string;
url: string;
domain: string;
snippet: string;
rank: number;
score: number;
matchedAsin?: string;
detectedPrice?: number;
detectedPriceCurrency?: string;
detectedPriceLabel?: string;
detectedPriceText?: string;
engines: string[];
};
export type SearxngSearchOptions = {
provider?: "serpapi" | "google-custom-search" | "searxng";
baseUrl?: string;
googleApiKey?: string;
googleCx?: string;
serpapiApiKey?: string;
timeoutMs?: number;
maxResults?: number;
page?: number;
categories?: string;
engines?: string;
includeUnmatchedAsinResults?: boolean;
fetchImpl?: typeof fetch;
};
type RawSearchResult = {
title: string;
url: string;
snippet: string;
engines: string[];
rank: number;
};
type JsonSearchResponse = {
results?: Array<Record<string, unknown>>;
};
type PriceDetection = {
amount: number;
currency: string;
text: string;
label?: string;
};
export async function searchAsinOffers(
asin: string,
options: SearxngSearchOptions = {},
): Promise<SearxngOfferSearchResult[]> {
return searchProductOffers(normalizeAsin(asin), options);
}
export async function searchProductOffers(
query: string,
options: SearxngSearchOptions = {},
): Promise<SearxngOfferSearchResult[]> {
const normalizedQuery = query.trim();
if (!normalizedQuery) {
throw new Error("Search query is required.");
}
const inferredAsin = getAsinQuery(normalizedQuery);
const searxngQuery = inferredAsin
? `${inferredAsin} price sale offer buy online`
: normalizedQuery;
const maxResults = positiveInteger(
options.maxResults ?? readEnvInt("SEARXNG_MAX_RESULTS", DEFAULT_MAX_RESULTS),
DEFAULT_MAX_RESULTS,
);
const rawResults =
options.provider === "searxng"
? await fetchSearxngResults(searxngQuery, options)
: options.provider === "google-custom-search"
? await fetchGoogleCustomSearchResults(searxngQuery, {
...options,
maxResults,
})
: await fetchSerpApiGoogleShoppingResults(searxngQuery, {
...options,
provider: "serpapi",
maxResults,
});
return rawResults
.map((result) => normalizeResult(result, searxngQuery, inferredAsin))
.filter((result) => {
if (!result.url) return false;
if (!inferredAsin || options.includeUnmatchedAsinResults) return true;
return result.matchedAsin === inferredAsin;
})
.sort((a, b) => b.score - a.score || a.rank - b.rank)
.slice(0, maxResults);
}
export function normalizeAsin(value: string): string {
const asin = value.trim().toUpperCase();
if (!ASIN_REGEX.test(asin)) {
throw new Error(`Invalid ASIN: ${value}`);
}
return asin;
}
function getAsinQuery(value: string): string | undefined {
const normalized = value.trim().toUpperCase();
return ASIN_REGEX.test(normalized) ? normalized : undefined;
}
async function fetchSearxngResults(
query: string,
options: SearxngSearchOptions,
): Promise<RawSearchResult[]> {
const baseUrl = normalizeBaseUrl(
options.baseUrl ?? Bun.env.SEARXNG_URL ?? DEFAULT_SEARXNG_URL,
);
const timeoutMs = positiveInteger(
options.timeoutMs ?? readEnvInt("SEARXNG_TIMEOUT_MS", DEFAULT_TIMEOUT_MS),
DEFAULT_TIMEOUT_MS,
);
const page = positiveInteger(options.page ?? 1, 1);
const categories = options.categories ?? "general";
const fetchImpl = options.fetchImpl ?? fetch;
const requestQuery = applySearxngEngineBang(query, options.engines);
const jsonUrl = buildSearchUrl(baseUrl, requestQuery, {
categories,
engines: options.engines,
page,
format: "json",
});
const jsonResponse = await fetchWithTimeout(fetchImpl, jsonUrl, timeoutMs);
if (isJsonResponse(jsonResponse)) {
const json = (await jsonResponse.json()) as JsonSearchResponse;
return parseJsonResults(json);
}
const htmlUrl = buildSearchUrl(baseUrl, requestQuery, {
categories,
engines: options.engines,
page,
});
const htmlResponse = await fetchWithTimeout(fetchImpl, htmlUrl, timeoutMs);
if (!htmlResponse.ok) {
throw new Error(
`SearXNG search failed: status=${htmlResponse.status} url=${htmlUrl.toString()}`,
);
}
return parseHtmlResults(await htmlResponse.text());
}
function applySearxngEngineBang(query: string, engines: string | undefined): string {
if (!engines || query.trim().startsWith("!")) return query;
const engineList = engines
.split(",")
.map((engine) => engine.trim().toLowerCase())
.filter(Boolean);
if (engineList.length !== 1) return query;
const shortcut = searxngEngineShortcut(engineList[0]!);
return shortcut ? `!${shortcut} ${query}` : query;
}
function searxngEngineShortcut(engine: string): string | undefined {
if (engine === "google") return "go";
return undefined;
}
function isJsonResponse(response: Response): boolean {
const contentType = response.headers.get("content-type") ?? "";
return response.ok && contentType.toLowerCase().includes("application/json");
}
async function fetchWithTimeout(
fetchImpl: typeof fetch,
url: URL,
timeoutMs: number,
): Promise<Response> {
const controller = new AbortController();
const timeout = setTimeout(() => controller.abort(), timeoutMs);
try {
return await fetchImpl(url, {
signal: controller.signal,
headers: {
accept: "application/json,text/html;q=0.9,*/*;q=0.8",
"user-agent": "asin-check/1.0 (+https://searxng.nvictor.me/)",
},
});
} finally {
clearTimeout(timeout);
}
}
function buildSearchUrl(
baseUrl: URL,
query: string,
params: { categories: string; engines?: string; page: number; format?: string },
): URL {
const url = new URL("search", baseUrl);
url.searchParams.set("q", query);
url.searchParams.set("categories", params.categories);
if (params.engines) {
url.searchParams.set("engines", params.engines);
}
url.searchParams.set("pageno", String(params.page));
if (params.format) {
url.searchParams.set("format", params.format);
}
return url;
}
async function fetchGoogleCustomSearchResults(
query: string,
options: SearxngSearchOptions,
): Promise<RawSearchResult[]> {
const apiKey = options.googleApiKey ?? Bun.env.GOOGLE_API_KEY;
const cx =
options.googleCx ??
Bun.env.GOOGLE_CSE_ID ??
Bun.env.GOOGLE_CX ??
Bun.env.GOOGLE_SEARCH_ENGINE_ID;
if (!apiKey) {
throw new Error("Missing GOOGLE_API_KEY for Google Custom Search.");
}
if (!cx) {
throw new Error(
"Missing Google Custom Search engine id. Set GOOGLE_CSE_ID, GOOGLE_CX, or GOOGLE_SEARCH_ENGINE_ID.",
);
}
const timeoutMs = positiveInteger(
options.timeoutMs ?? readEnvInt("SEARXNG_TIMEOUT_MS", DEFAULT_TIMEOUT_MS),
DEFAULT_TIMEOUT_MS,
);
const page = positiveInteger(options.page ?? 1, 1);
const num = Math.min(
10,
positiveInteger(options.maxResults ?? DEFAULT_MAX_RESULTS, DEFAULT_MAX_RESULTS),
);
const fetchImpl = options.fetchImpl ?? fetch;
const url = new URL(options.baseUrl ?? DEFAULT_GOOGLE_CUSTOM_SEARCH_URL);
url.searchParams.set("key", apiKey);
url.searchParams.set("cx", cx);
url.searchParams.set("q", query);
url.searchParams.set("num", String(num));
url.searchParams.set("start", String((page - 1) * num + 1));
const response = await fetchWithTimeout(fetchImpl, url, timeoutMs);
if (!response.ok) {
const body = await response.text().catch(() => "");
throw new Error(
`Google Custom Search failed: status=${response.status} ${body.slice(0, 300)}`,
);
}
const json = (await response.json()) as GoogleCustomSearchResponse;
return parseGoogleCustomSearchResults(json);
}
type GoogleCustomSearchResponse = {
items?: GoogleCustomSearchItem[];
};
type GoogleCustomSearchItem = {
title?: string;
link?: string;
snippet?: string;
displayLink?: string;
pagemap?: Record<string, unknown>;
};
type SerpApiShoppingResponse = {
shopping_results?: SerpApiShoppingResult[];
inline_shopping_results?: SerpApiShoppingResult[];
categorized_shopping_results?: Array<{
shopping_results?: SerpApiShoppingResult[];
}>;
error?: string;
};
type SerpApiShoppingResult = {
position?: number;
title?: string;
source?: string;
link?: string;
product_link?: string;
serpapi_product_api?: string;
price?: string;
extracted_price?: number;
old_price?: string;
extracted_old_price?: number;
delivery?: string;
rating?: number;
reviews?: number;
snippet?: string;
};
async function fetchSerpApiGoogleShoppingResults(
query: string,
options: SearxngSearchOptions,
): Promise<RawSearchResult[]> {
const apiKey = options.serpapiApiKey ?? Bun.env.SERPAPI_API_KEY;
if (!apiKey) {
throw new Error(
"Missing SERPAPI_API_KEY. Google does not provide an official public Shopping-tab search API; use SerpApi's google_shopping API or another SERP provider.",
);
}
const timeoutMs = positiveInteger(
options.timeoutMs ?? readEnvInt("SEARXNG_TIMEOUT_MS", DEFAULT_TIMEOUT_MS),
DEFAULT_TIMEOUT_MS,
);
const page = positiveInteger(options.page ?? 1, 1);
const fetchImpl = options.fetchImpl ?? fetch;
const url = new URL(options.baseUrl ?? DEFAULT_SERPAPI_URL);
url.searchParams.set("engine", "google_shopping");
url.searchParams.set("q", query);
url.searchParams.set("api_key", apiKey);
url.searchParams.set("google_domain", "google.com");
url.searchParams.set("gl", "us");
url.searchParams.set("hl", "en");
url.searchParams.set("start", String((page - 1) * 60));
const response = await fetchWithTimeout(fetchImpl, url, timeoutMs);
if (!response.ok) {
const body = await response.text().catch(() => "");
throw new Error(
`SerpApi Google Shopping failed: status=${response.status} ${body.slice(0, 300)}`,
);
}
const json = (await response.json()) as SerpApiShoppingResponse;
if (json.error) {
throw new Error(`SerpApi Google Shopping failed: ${json.error}`);
}
return parseSerpApiShoppingResults(json);
}
function parseSerpApiShoppingResults(
json: SerpApiShoppingResponse,
): RawSearchResult[] {
const results = [
...(json.shopping_results ?? []),
...(json.inline_shopping_results ?? []),
...(json.categorized_shopping_results ?? []).flatMap(
(category) => category.shopping_results ?? [],
),
];
return results.flatMap((item, index) => {
const url =
optionalString(item.link) ??
optionalString(item.product_link) ??
optionalString(item.serpapi_product_api);
if (!url) return [];
const priceText = optionalString(item.price);
const snippet = [
priceText ? `offer price: ${priceText}` : undefined,
optionalString(item.old_price)
? `list price: ${item.old_price}`
: undefined,
optionalString(item.source) ? `merchant: ${item.source}` : undefined,
optionalString(item.delivery),
optionalString(item.snippet),
typeof item.rating === "number" ? `rating: ${item.rating}` : undefined,
typeof item.reviews === "number" ? `reviews: ${item.reviews}` : undefined,
]
.filter((value): value is string => !!value)
.join(" ");
return [
{
title: optionalString(item.title) ?? "",
url,
snippet,
engines: ["serpapi google shopping"],
rank: item.position ?? index + 1,
},
];
});
}
function parseGoogleCustomSearchResults(
json: GoogleCustomSearchResponse,
): RawSearchResult[] {
return (json.items ?? []).flatMap((item, index) => {
const url = optionalString(item.link);
if (!url) return [];
const metadataText = extractGoogleCustomSearchMetadataText(item);
return [
{
title: optionalString(item.title) ?? "",
url,
snippet: [optionalString(item.snippet), metadataText]
.filter((value): value is string => !!value)
.join(" "),
engines: ["google custom search"],
rank: index + 1,
},
];
});
}
function extractGoogleCustomSearchMetadataText(
item: GoogleCustomSearchItem,
): string {
const pagemap = item.pagemap ?? {};
const chunks: string[] = [];
for (const offer of readPagemapObjects(pagemap.offer)) {
appendPriceMetadata(chunks, offer);
}
for (const product of readPagemapObjects(pagemap.product)) {
appendPriceMetadata(chunks, product);
}
for (const metatag of readPagemapObjects(pagemap.metatags)) {
appendPriceMetadata(chunks, metatag);
}
return chunks.join(" ");
}
function appendPriceMetadata(chunks: string[], value: Record<string, unknown>): void {
const price =
optionalString(value.price) ??
optionalString(value.lowprice) ??
optionalString(value.highprice) ??
optionalString(value["product:price:amount"]) ??
optionalString(value["og:price:amount"]) ??
optionalString(value["twitter:data1"]);
if (!price) return;
const currency =
optionalString(value.pricecurrency) ??
optionalString(value.priceCurrency) ??
optionalString(value["product:price:currency"]) ??
optionalString(value["og:price:currency"]);
chunks.push(currency ? `offer price: ${currency} ${price}` : `offer price: ${price}`);
}
function readPagemapObjects(value: unknown): Array<Record<string, unknown>> {
if (!Array.isArray(value)) return [];
return value.filter(
(item): item is Record<string, unknown> =>
item != null && typeof item === "object" && !Array.isArray(item),
);
}
function parseJsonResults(json: JsonSearchResponse): RawSearchResult[] {
return (json.results ?? []).flatMap((result, index) => {
const url = optionalString(result.url);
if (!url) return [];
return [
{
title: optionalString(result.title) ?? "",
url,
snippet: optionalString(result.content) ?? "",
engines: normalizeEngines(result.engines ?? result.engine),
rank: index + 1,
},
];
});
}
async function parseHtmlResults(html: string): Promise<RawSearchResult[]> {
type Draft = {
title: string;
url: string;
snippet: string;
engines: string[];
};
const results: RawSearchResult[] = [];
let current: Draft | null = null;
let currentTextTarget: "title" | "snippet" | "engine" | null = null;
const appendText = (text: string) => {
if (!current || !currentTextTarget) return;
const normalized = text.replace(/\s+/g, " ").trim();
if (!normalized) return;
if (currentTextTarget === "engine") {
current.engines.push(normalized);
return;
}
current[currentTextTarget] = appendWithSpace(
current[currentTextTarget],
normalized,
);
};
const response = new HTMLRewriter()
.on("article.result", {
element(element) {
current = { title: "", url: "", snippet: "", engines: [] };
const onEndTag = (element as unknown as {
onEndTag?: (handler: () => void) => void;
}).onEndTag;
onEndTag?.call(element, () => {
if (current?.url) {
results.push({ ...current, rank: results.length + 1 });
}
current = null;
currentTextTarget = null;
});
},
})
.on("article.result a.url_header", {
element(element) {
if (current && !current.url) {
current.url = element.getAttribute("href") ?? "";
}
},
})
.on("article.result h3 a", {
element(element) {
if (current && !current.url) {
current.url = element.getAttribute("href") ?? "";
}
currentTextTarget = "title";
},
text(text) {
appendText(text.text);
if (text.lastInTextNode) currentTextTarget = null;
},
})
.on("article.result p.content", {
text(text) {
currentTextTarget = "snippet";
appendText(text.text);
if (text.lastInTextNode) currentTextTarget = null;
},
})
.on("article.result .engines span", {
text(text) {
currentTextTarget = "engine";
appendText(text.text);
if (text.lastInTextNode) currentTextTarget = null;
},
})
.transform(new Response(html));
await response.text();
return results;
}
function normalizeResult(
raw: RawSearchResult,
query: string,
asin?: string,
): SearxngOfferSearchResult {
const url = normalizeUrl(raw.url);
const domain = extractDomain(url);
const title = normalizeText(raw.title);
const snippet = normalizeText(raw.snippet);
const matchedAsin = findMatchedAsin(`${title} ${snippet} ${url}`);
const detectedPrice = detectPrice(`${title} ${snippet}`);
const score = scoreResult({
asin,
matchedAsin,
detectedPrice: detectedPrice?.amount,
domain,
rank: raw.rank,
});
return {
...(asin ? { asin } : {}),
query,
title,
url,
domain,
snippet,
rank: raw.rank,
score,
...(matchedAsin ? { matchedAsin } : {}),
...(detectedPrice
? {
detectedPrice: detectedPrice.amount,
detectedPriceCurrency: detectedPrice.currency,
...(detectedPrice.label
? { detectedPriceLabel: detectedPrice.label }
: {}),
detectedPriceText: detectedPrice.text,
}
: {}),
engines: dedupe(raw.engines.map(normalizeText).filter(Boolean)),
};
}
function scoreResult(input: {
asin?: string;
matchedAsin?: string;
detectedPrice?: number;
domain: string;
rank: number;
}): number {
let score = 100 - input.rank;
if (input.asin && input.matchedAsin === input.asin) score += 80;
if (input.matchedAsin && !input.asin) score += 40;
if (input.detectedPrice != null) score += 30;
if (input.domain && !isAmazonDomain(input.domain)) score += 20;
if (isAmazonDomain(input.domain)) score -= 15;
return score;
}
function normalizeBaseUrl(value: string): URL {
const url = new URL(value);
if (!url.pathname.endsWith("/")) {
url.pathname = `${url.pathname}/`;
}
return url;
}
function normalizeUrl(value: string): string {
try {
return new URL(value).toString();
} catch {
return value.trim();
}
}
function extractDomain(value: string): string {
try {
return new URL(value).hostname.replace(/^www\./i, "").toLowerCase();
} catch {
return "";
}
}
function isAmazonDomain(domain: string): boolean {
return /(^|\.)amazon\./i.test(domain);
}
function findMatchedAsin(value: string): string | undefined {
const match = value.toUpperCase().match(ASIN_MATCH_REGEX);
return match?.[0];
}
function detectPrice(value: string): PriceDetection | undefined {
const labeledCandidates = Array.from(value.matchAll(LABELED_PRICE_REGEX))
.map((match) => parsePriceMatch(match[2], match[1]))
.filter((price): price is PriceDetection => !!price)
.sort(comparePriceDetections);
if (labeledCandidates[0]) return labeledCandidates[0];
const candidates = Array.from(value.matchAll(PRICE_REGEX))
.map((match) => parsePriceMatch(match[1]))
.filter((price): price is PriceDetection => !!price);
return candidates[0];
}
function parsePriceMatch(
rawPrice: string | undefined,
rawLabel?: string,
): PriceDetection | undefined {
if (!rawPrice) return undefined;
const text = normalizeText(rawPrice);
const currency = detectCurrency(text);
const amountMatch = text.match(/[0-9]{1,5}(?:,[0-9]{3})*(?:\.[0-9]{2})?/);
if (!amountMatch?.[0]) return undefined;
const amount = Number(amountMatch[0].replace(/,/g, ""));
if (!Number.isFinite(amount) || amount <= 0) return undefined;
const label = rawLabel ? normalizeText(rawLabel).toLowerCase() : undefined;
return {
amount,
currency,
text,
...(label ? { label } : {}),
};
}
function comparePriceDetections(a: PriceDetection, b: PriceDetection): number {
return priceLabelRank(a.label) - priceLabelRank(b.label);
}
function priceLabelRank(label: string | undefined): number {
if (!label) return PRICE_LABELS.length;
const index = PRICE_LABELS.indexOf(label as (typeof PRICE_LABELS)[number]);
return index === -1 ? PRICE_LABELS.length : index;
}
function detectCurrency(value: string): string {
if (/\b(EUR)\b|€/i.test(value)) return "EUR";
if (/\b(GBP)\b|£/i.test(value)) return "GBP";
if (/\b(INR)\b|₹/i.test(value)) return "INR";
if (/\b(CAD)\b/i.test(value)) return "CAD";
if (/\b(AUD)\b/i.test(value)) return "AUD";
return "USD";
}
function escapeForCharClass(value: string): string {
return value.replace(/[-\\\]^]/g, "\\$&");
}
function normalizeEngines(value: unknown): string[] {
if (Array.isArray(value)) {
return value.map(String).filter(Boolean);
}
const engine = optionalString(value);
return engine ? [engine] : [];
}
function optionalString(value: unknown): string | undefined {
if (value == null) return undefined;
const text = String(value).trim();
return text ? text : undefined;
}
function normalizeText(value: string): string {
return decodeHtmlEntities(value).replace(/\s+/g, " ").trim();
}
function appendWithSpace(left: string, right: string): string {
return left ? `${left} ${right}` : right;
}
function decodeHtmlEntities(value: string): string {
return value
.replace(/&amp;/g, "&")
.replace(/&quot;/g, '"')
.replace(/&#39;/g, "'")
.replace(/&lt;/g, "<")
.replace(/&gt;/g, ">")
.replace(/&nbsp;/g, " ");
}
function dedupe(values: string[]): string[] {
return Array.from(new Set(values));
}
function readEnvInt(key: string, fallback: number): number {
const parsed = Number(Bun.env[key]);
return Number.isFinite(parsed) ? parsed : fallback;
}
function positiveInteger(value: number, fallback: number): number {
return Number.isInteger(value) && value > 0 ? value : fallback;
}