feat: add ASIN offer search functionality
- Introduced a new script `asin-offer-search.ts` for searching product offers by ASIN. - Updated `package.json` to include a new command for the ASIN offer search. - Enhanced configuration in `config.ts` to support SearXNG URL and timeout settings. - Added comprehensive tests for the new search functionality in `searxng.test.ts`. - Implemented the core search logic in `searxng.ts`, supporting multiple providers and price detection.
This commit is contained in:
@@ -12,4 +12,7 @@ AWS_SECRET_ACCESS_KEY=your_aws_secret_access_key
|
|||||||
REDIS_URL=redis://localhost:6379
|
REDIS_URL=redis://localhost:6379
|
||||||
LLM_URL=http://localhost:1234/v1
|
LLM_URL=http://localhost:1234/v1
|
||||||
LLM_MODEL=default
|
LLM_MODEL=default
|
||||||
CACHE_TTL=86400
|
CACHE_TTL=86400
|
||||||
|
GOOGLE_API_KEY=your_google_api_key
|
||||||
|
GOOGLE_CSE_ID=your_google_programmable_search_engine_id
|
||||||
|
SERPAPI_API_KEY=your_serpapi_api_key_for_google_shopping
|
||||||
|
|||||||
@@ -8,6 +8,7 @@
|
|||||||
"monthly-sold": "bun run src/top-monthly-sold-by-category.ts",
|
"monthly-sold": "bun run src/top-monthly-sold-by-category.ts",
|
||||||
"mid-range": "bun run src/mid-range-sellers-by-category.ts",
|
"mid-range": "bun run src/mid-range-sellers-by-category.ts",
|
||||||
"stalker": "bun run src/stalker.ts",
|
"stalker": "bun run src/stalker.ts",
|
||||||
|
"search-offers": "bun run src/asin-offer-search.ts",
|
||||||
"upc": "bun run src/upc-lookup.ts",
|
"upc": "bun run src/upc-lookup.ts",
|
||||||
"upc-file": "bun run src/upc-file-analysis.ts",
|
"upc-file": "bun run src/upc-file-analysis.ts",
|
||||||
"start": "bun run src/index.ts",
|
"start": "bun run src/index.ts",
|
||||||
|
|||||||
134
src/asin-offer-search.ts
Normal file
134
src/asin-offer-search.ts
Normal file
@@ -0,0 +1,134 @@
|
|||||||
|
import { searchProductOffers, type SearxngOfferSearchResult } from "./searxng.ts";
|
||||||
|
|
||||||
|
type CliArgs = {
|
||||||
|
query: string;
|
||||||
|
json: boolean;
|
||||||
|
provider?: "serpapi" | "google-custom-search" | "searxng";
|
||||||
|
categories?: string;
|
||||||
|
engines?: string;
|
||||||
|
limit?: number;
|
||||||
|
};
|
||||||
|
|
||||||
|
function readFlagValue(args: string[], flag: string): string | undefined {
|
||||||
|
const equalsArg = args.find((arg) => arg.startsWith(`${flag}=`));
|
||||||
|
if (equalsArg) return equalsArg.slice(flag.length + 1);
|
||||||
|
|
||||||
|
const index = args.indexOf(flag);
|
||||||
|
return index === -1 ? undefined : args[index + 1];
|
||||||
|
}
|
||||||
|
|
||||||
|
function parseArgs(args: string[]): CliArgs {
|
||||||
|
const json = args.includes("--json");
|
||||||
|
const shopping = args.includes("--shopping");
|
||||||
|
const providerRaw = readFlagValue(args, "--provider");
|
||||||
|
const engineRaw = readFlagValue(args, "--engine");
|
||||||
|
const categoryRaw = readFlagValue(args, "--category");
|
||||||
|
const limitRaw = readFlagValue(args, "--limit");
|
||||||
|
const limit = limitRaw == null ? undefined : Number(limitRaw);
|
||||||
|
const categories = categoryRaw ?? (shopping ? "shopping" : undefined);
|
||||||
|
const provider = normalizeProvider(providerRaw);
|
||||||
|
|
||||||
|
const queryParts = args.filter((arg, index) => {
|
||||||
|
if (arg.startsWith("--")) return false;
|
||||||
|
const previous = args[index - 1];
|
||||||
|
return (
|
||||||
|
previous !== "--limit" &&
|
||||||
|
previous !== "--category" &&
|
||||||
|
previous !== "--engine" &&
|
||||||
|
previous !== "--provider"
|
||||||
|
);
|
||||||
|
});
|
||||||
|
const query = queryParts.join(" ").trim();
|
||||||
|
|
||||||
|
if (!query) {
|
||||||
|
console.error(
|
||||||
|
'Usage: bun run search-offers "product search terms" [--limit 10] [--provider serpapi|google-custom-search|searxng] [--json]',
|
||||||
|
);
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (
|
||||||
|
limitRaw != null &&
|
||||||
|
(limit == null || !Number.isInteger(limit) || limit <= 0)
|
||||||
|
) {
|
||||||
|
console.error("--limit must be a positive integer.");
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
query,
|
||||||
|
json,
|
||||||
|
provider,
|
||||||
|
categories,
|
||||||
|
engines: engineRaw,
|
||||||
|
limit,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
function printTable(results: SearxngOfferSearchResult[]): void {
|
||||||
|
if (results.length === 0) {
|
||||||
|
console.log("No offer results found.");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
console.table(
|
||||||
|
results.map((result) => ({
|
||||||
|
Rank: result.rank,
|
||||||
|
Score: result.score,
|
||||||
|
ASIN: result.matchedAsin ?? "",
|
||||||
|
Price: formatPrice(result),
|
||||||
|
"Price Label": result.detectedPriceLabel ?? "",
|
||||||
|
Domain: result.domain,
|
||||||
|
Title: result.title,
|
||||||
|
URL: result.url,
|
||||||
|
})),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
async function main(): Promise<void> {
|
||||||
|
const args = parseArgs(process.argv.slice(2));
|
||||||
|
const options = {
|
||||||
|
maxResults: args.limit,
|
||||||
|
provider: args.provider,
|
||||||
|
categories: args.categories,
|
||||||
|
engines: args.engines,
|
||||||
|
};
|
||||||
|
const results = await searchProductOffers(args.query, options);
|
||||||
|
|
||||||
|
if (args.json) {
|
||||||
|
console.log(JSON.stringify(results, null, 2));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
printTable(results);
|
||||||
|
}
|
||||||
|
|
||||||
|
function normalizeProvider(
|
||||||
|
value: string | undefined,
|
||||||
|
): "serpapi" | "google-custom-search" | "searxng" | undefined {
|
||||||
|
if (value == null) return undefined;
|
||||||
|
const provider = value.trim().toLowerCase();
|
||||||
|
if (provider === "serpapi" || provider === "google-shopping") {
|
||||||
|
return "serpapi";
|
||||||
|
}
|
||||||
|
if (provider === "google-custom-search") {
|
||||||
|
return "google-custom-search";
|
||||||
|
}
|
||||||
|
if (provider === "searxng") return provider;
|
||||||
|
console.error("--provider must be one of: serpapi, google-custom-search, searxng");
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
function formatPrice(result: SearxngOfferSearchResult): string {
|
||||||
|
if (result.detectedPrice == null) return "";
|
||||||
|
if (result.detectedPriceText) return result.detectedPriceText;
|
||||||
|
const currency = result.detectedPriceCurrency ?? "USD";
|
||||||
|
return currency === "USD"
|
||||||
|
? `$${result.detectedPrice}`
|
||||||
|
: `${currency} ${result.detectedPrice}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
main().catch((err) => {
|
||||||
|
console.error(`Search failed: ${err instanceof Error ? err.message : err}`);
|
||||||
|
process.exit(1);
|
||||||
|
});
|
||||||
@@ -18,10 +18,13 @@ function optionalBoolean(key: string, fallback: boolean): boolean {
|
|||||||
export const config = {
|
export const config = {
|
||||||
keepaApiKey: required("KEEPA_API_KEY"),
|
keepaApiKey: required("KEEPA_API_KEY"),
|
||||||
redisUrl: optional("REDIS_URL", "redis://localhost:6379"),
|
redisUrl: optional("REDIS_URL", "redis://localhost:6379"),
|
||||||
llmUrl: optional("LLM_URL", "http://localhost:1234/v1"),
|
llmUrl: optional("LLM_URL", "http://localhost:1234/v1"),
|
||||||
llmModel: optional("LLM_MODEL", "default"),
|
llmModel: optional("LLM_MODEL", "default"),
|
||||||
cacheTtl: parseInt(optional("CACHE_TTL", "86400"), 10),
|
cacheTtl: parseInt(optional("CACHE_TTL", "86400"), 10),
|
||||||
spApiClientId: Bun.env.SP_API_CLIENT_ID,
|
searxngUrl: optional("SEARXNG_URL", "https://searxng.nvictor.me/"),
|
||||||
|
searxngTimeoutMs: parseInt(optional("SEARXNG_TIMEOUT_MS", "10000"), 10),
|
||||||
|
searxngMaxResults: parseInt(optional("SEARXNG_MAX_RESULTS", "10"), 10),
|
||||||
|
spApiClientId: Bun.env.SP_API_CLIENT_ID,
|
||||||
spApiClientSecret: Bun.env.SP_API_CLIENT_SECRET,
|
spApiClientSecret: Bun.env.SP_API_CLIENT_SECRET,
|
||||||
spApiRefreshToken: Bun.env.SP_API_REFRESH_TOKEN,
|
spApiRefreshToken: Bun.env.SP_API_REFRESH_TOKEN,
|
||||||
spApiRegion: optional("SP_API_REGION", "na"),
|
spApiRegion: optional("SP_API_REGION", "na"),
|
||||||
|
|||||||
350
src/searxng.test.ts
Normal file
350
src/searxng.test.ts
Normal file
@@ -0,0 +1,350 @@
|
|||||||
|
import { afterAll, beforeEach, expect, mock, test } from "bun:test";
|
||||||
|
import { normalizeAsin, searchProductOffers } from "./searxng.ts";
|
||||||
|
|
||||||
|
const originalFetch = globalThis.fetch;
|
||||||
|
|
||||||
|
beforeEach(() => {
|
||||||
|
globalThis.fetch = originalFetch;
|
||||||
|
});
|
||||||
|
|
||||||
|
afterAll(() => {
|
||||||
|
globalThis.fetch = originalFetch;
|
||||||
|
});
|
||||||
|
|
||||||
|
test("normalizeAsin uppercases and validates ASINs", () => {
|
||||||
|
expect(normalizeAsin(" b07sn9bhvv ")).toBe("B07SN9BHVV");
|
||||||
|
expect(() => normalizeAsin("not-an-asin")).toThrow("Invalid ASIN");
|
||||||
|
});
|
||||||
|
|
||||||
|
test("searchProductOffers derives ASIN search behavior for ASIN-only queries", async () => {
|
||||||
|
const fetchMock = mock(async (input: string | URL | Request) => {
|
||||||
|
const url = input instanceof URL ? input : new URL(String(input));
|
||||||
|
expect(url.pathname).toBe("/search");
|
||||||
|
expect(url.searchParams.get("format")).toBe("json");
|
||||||
|
expect(url.searchParams.get("q")).toBe("B07SN9BHVV price sale offer buy online");
|
||||||
|
|
||||||
|
return Response.json({
|
||||||
|
results: [
|
||||||
|
{
|
||||||
|
title: "Amazon listing B07SN9BHVV",
|
||||||
|
url: "https://www.amazon.com/dp/B07SN9BHVV",
|
||||||
|
content: "Official marketplace listing.",
|
||||||
|
engines: ["duckduckgo"],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
title: "Romand palette offer",
|
||||||
|
url: "https://example-shop.com/item",
|
||||||
|
content: "Buy product ASIN B07SN9BHVV. Offer price: $12.99 today.",
|
||||||
|
engines: ["brave"],
|
||||||
|
},
|
||||||
|
],
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
const results = await searchProductOffers("B07SN9BHVV", {
|
||||||
|
provider: "searxng",
|
||||||
|
baseUrl: "https://searxng.test/",
|
||||||
|
fetchImpl: fetchMock as unknown as typeof fetch,
|
||||||
|
maxResults: 10,
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(results).toHaveLength(2);
|
||||||
|
expect(results[0]?.domain).toBe("example-shop.com");
|
||||||
|
expect(results[0]?.matchedAsin).toBe("B07SN9BHVV");
|
||||||
|
expect(results[0]?.detectedPrice).toBe(12.99);
|
||||||
|
expect(results[0]?.detectedPriceCurrency).toBe("USD");
|
||||||
|
expect(results[0]?.detectedPriceLabel).toBe("offer price");
|
||||||
|
expect(results[0]?.detectedPriceText).toBe("$12.99");
|
||||||
|
expect(results[0]?.engines).toEqual(["brave"]);
|
||||||
|
expect(fetchMock).toHaveBeenCalledTimes(1);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("searchProductOffers falls back to HTML when JSON is unavailable", async () => {
|
||||||
|
const html = `
|
||||||
|
<article class="result result-default category-general">
|
||||||
|
<a class="url_header" href="https://supplier.example/products/romand"></a>
|
||||||
|
<h3><a href="https://supplier.example/products/romand">Supplier offer B07SN9BHVV</a></h3>
|
||||||
|
<p class="content">Wholesale product sale price: USD 9.50 with ASIN B07SN9BHVV.</p>
|
||||||
|
<div class="engines"><span>duckduckgo</span></div>
|
||||||
|
</article>
|
||||||
|
`;
|
||||||
|
const fetchMock = mock(async (input: string | URL | Request) => {
|
||||||
|
const url = input instanceof URL ? input : new URL(String(input));
|
||||||
|
if (url.searchParams.get("format") === "json") {
|
||||||
|
return new Response("forbidden", { status: 403 });
|
||||||
|
}
|
||||||
|
return new Response(html, {
|
||||||
|
status: 200,
|
||||||
|
headers: { "content-type": "text/html" },
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
const results = await searchProductOffers("B07SN9BHVV", {
|
||||||
|
provider: "searxng",
|
||||||
|
baseUrl: "https://searxng.test/",
|
||||||
|
fetchImpl: fetchMock as unknown as typeof fetch,
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(results).toHaveLength(1);
|
||||||
|
expect(results[0]?.title).toBe("Supplier offer B07SN9BHVV");
|
||||||
|
expect(results[0]?.domain).toBe("supplier.example");
|
||||||
|
expect(results[0]?.detectedPrice).toBe(9.5);
|
||||||
|
expect(results[0]?.detectedPriceLabel).toBe("sale price");
|
||||||
|
expect(results[0]?.detectedPriceText).toBe("USD 9.50");
|
||||||
|
expect(results[0]?.matchedAsin).toBe("B07SN9BHVV");
|
||||||
|
expect(results[0]?.engines).toEqual(["duckduckgo"]);
|
||||||
|
expect(fetchMock).toHaveBeenCalledTimes(2);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("searchProductOffers detects common selling and sale price formats", async () => {
|
||||||
|
const fetchMock = mock(async () =>
|
||||||
|
Response.json({
|
||||||
|
results: [
|
||||||
|
{
|
||||||
|
title: "Supplier page",
|
||||||
|
url: "https://supplier.example/item",
|
||||||
|
content: "Selling price is €18.75 and list price is $24.00.",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
title: "Backup page",
|
||||||
|
url: "https://backup.example/item",
|
||||||
|
content: "Available now for 22.10 USD.",
|
||||||
|
},
|
||||||
|
],
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
|
||||||
|
const results = await searchProductOffers("romand palette price", {
|
||||||
|
provider: "searxng",
|
||||||
|
baseUrl: "https://searxng.test/",
|
||||||
|
fetchImpl: fetchMock as unknown as typeof fetch,
|
||||||
|
maxResults: 2,
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(results[0]?.detectedPrice).toBe(18.75);
|
||||||
|
expect(results[0]?.detectedPriceCurrency).toBe("EUR");
|
||||||
|
expect(results[0]?.detectedPriceLabel).toBe("selling price");
|
||||||
|
expect(results[1]?.detectedPrice).toBe(22.1);
|
||||||
|
expect(results[1]?.detectedPriceCurrency).toBe("USD");
|
||||||
|
});
|
||||||
|
|
||||||
|
test("searchProductOffers filters unrelated priced results for ASIN-only queries", async () => {
|
||||||
|
const fetchMock = mock(async () =>
|
||||||
|
Response.json({
|
||||||
|
results: [
|
||||||
|
{
|
||||||
|
title: "Unrelated deal",
|
||||||
|
url: "https://deals.example/phones",
|
||||||
|
content: "This price is $449 but it is for another product.",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
title: "Amazon listing B07SN9BHVV",
|
||||||
|
url: "https://www.amazon.in/dp/B07SN9BHVV",
|
||||||
|
content: "1 offer from ₹550.00 · Buying options.",
|
||||||
|
},
|
||||||
|
],
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
|
||||||
|
const results = await searchProductOffers("B07SN9BHVV", {
|
||||||
|
provider: "searxng",
|
||||||
|
baseUrl: "https://searxng.test/",
|
||||||
|
fetchImpl: fetchMock as unknown as typeof fetch,
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(results).toHaveLength(1);
|
||||||
|
expect(results[0]?.matchedAsin).toBe("B07SN9BHVV");
|
||||||
|
expect(results[0]?.detectedPrice).toBe(550);
|
||||||
|
expect(results[0]?.detectedPriceCurrency).toBe("INR");
|
||||||
|
expect(results[0]?.detectedPriceText).toBe("₹550.00");
|
||||||
|
});
|
||||||
|
|
||||||
|
test("searchProductOffers keeps arbitrary query strings generic", async () => {
|
||||||
|
const fetchMock = mock(async (input: string | URL | Request) => {
|
||||||
|
const url = input instanceof URL ? input : new URL(String(input));
|
||||||
|
expect(url.searchParams.get("q")).toBe("romand dry mango tulip price");
|
||||||
|
|
||||||
|
return Response.json({
|
||||||
|
results: [
|
||||||
|
{
|
||||||
|
title: "Generic result",
|
||||||
|
url: "https://shop.example/romand",
|
||||||
|
content: "Sale price: $14.25",
|
||||||
|
},
|
||||||
|
],
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
const results = await searchProductOffers("romand dry mango tulip price", {
|
||||||
|
provider: "searxng",
|
||||||
|
baseUrl: "https://searxng.test/",
|
||||||
|
fetchImpl: fetchMock as unknown as typeof fetch,
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(results).toHaveLength(1);
|
||||||
|
expect(results[0]?.asin).toBeUndefined();
|
||||||
|
expect(results[0]?.detectedPrice).toBe(14.25);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("searchProductOffers sends configured categories", async () => {
|
||||||
|
const fetchMock = mock(async (input: string | URL | Request) => {
|
||||||
|
const url = input instanceof URL ? input : new URL(String(input));
|
||||||
|
expect(url.searchParams.get("categories")).toBe("shopping");
|
||||||
|
|
||||||
|
return Response.json({
|
||||||
|
results: [
|
||||||
|
{
|
||||||
|
title: "Shopping result",
|
||||||
|
url: "https://shop.example/item",
|
||||||
|
content: "Offer price: $10.00",
|
||||||
|
},
|
||||||
|
],
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
const results = await searchProductOffers("romand price", {
|
||||||
|
provider: "searxng",
|
||||||
|
baseUrl: "https://searxng.test/",
|
||||||
|
categories: "shopping",
|
||||||
|
fetchImpl: fetchMock as unknown as typeof fetch,
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(results[0]?.detectedPrice).toBe(10);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("searchProductOffers sends configured SearXNG engines", async () => {
|
||||||
|
const fetchMock = mock(async (input: string | URL | Request) => {
|
||||||
|
const url = input instanceof URL ? input : new URL(String(input));
|
||||||
|
expect(url.searchParams.get("engines")).toBe("google");
|
||||||
|
expect(url.searchParams.get("q")).toBe("!go romand price");
|
||||||
|
|
||||||
|
return Response.json({
|
||||||
|
results: [
|
||||||
|
{
|
||||||
|
title: "Google-backed result",
|
||||||
|
url: "https://shop.example/item",
|
||||||
|
content: "Offer price: $11.00",
|
||||||
|
engine: "google",
|
||||||
|
},
|
||||||
|
],
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
const results = await searchProductOffers("romand price", {
|
||||||
|
provider: "searxng",
|
||||||
|
baseUrl: "https://searxng.test/",
|
||||||
|
engines: "google",
|
||||||
|
fetchImpl: fetchMock as unknown as typeof fetch,
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(results[0]?.detectedPrice).toBe(11);
|
||||||
|
expect(results[0]?.engines).toEqual(["google"]);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("searchProductOffers uses Google Custom Search API and pagemap offer prices", async () => {
|
||||||
|
const fetchMock = mock(async (input: string | URL | Request) => {
|
||||||
|
const url = input instanceof URL ? input : new URL(String(input));
|
||||||
|
expect(url.hostname).toBe("googleapis.test");
|
||||||
|
expect(url.searchParams.get("key")).toBe("test-key");
|
||||||
|
expect(url.searchParams.get("cx")).toBe("test-cx");
|
||||||
|
expect(url.searchParams.get("num")).toBe("5");
|
||||||
|
expect(url.searchParams.get("q")).toBe("romand dry mango tulip");
|
||||||
|
|
||||||
|
return Response.json({
|
||||||
|
items: [
|
||||||
|
{
|
||||||
|
title: "Romand Dry Mango Tulip",
|
||||||
|
link: "https://store.example/romand",
|
||||||
|
snippet: "Buy from Store Example.",
|
||||||
|
pagemap: {
|
||||||
|
offer: [{ price: "12.50", pricecurrency: "USD" }],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
const results = await searchProductOffers("romand dry mango tulip", {
|
||||||
|
provider: "google-custom-search",
|
||||||
|
baseUrl: "https://googleapis.test/customsearch/v1",
|
||||||
|
googleApiKey: "test-key",
|
||||||
|
googleCx: "test-cx",
|
||||||
|
maxResults: 5,
|
||||||
|
fetchImpl: fetchMock as unknown as typeof fetch,
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(results).toHaveLength(1);
|
||||||
|
expect(results[0]?.title).toContain("Romand Dry Mango Tulip");
|
||||||
|
expect(results[0]?.domain).toBe("store.example");
|
||||||
|
expect(results[0]?.detectedPrice).toBe(12.5);
|
||||||
|
expect(results[0]?.detectedPriceLabel).toBe("offer price");
|
||||||
|
expect(results[0]?.engines).toEqual(["google custom search"]);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("searchProductOffers defaults to SerpApi Google Shopping results", async () => {
|
||||||
|
const fetchMock = mock(async (input: string | URL | Request) => {
|
||||||
|
const url = input instanceof URL ? input : new URL(String(input));
|
||||||
|
expect(url.hostname).toBe("serpapi.test");
|
||||||
|
expect(url.searchParams.get("engine")).toBe("google_shopping");
|
||||||
|
expect(url.searchParams.get("q")).toBe("dry mango tulip price");
|
||||||
|
expect(url.searchParams.get("api_key")).toBe("serpapi-key");
|
||||||
|
expect(url.searchParams.get("gl")).toBe("us");
|
||||||
|
expect(url.searchParams.get("hl")).toBe("en");
|
||||||
|
|
||||||
|
return Response.json({
|
||||||
|
shopping_results: [
|
||||||
|
{
|
||||||
|
position: 1,
|
||||||
|
title: "Romand Better Than Eyes Dry Mango Tulip",
|
||||||
|
source: "K-Beauty Store",
|
||||||
|
link: "https://store.example/products/romand",
|
||||||
|
price: "$13.40",
|
||||||
|
extracted_price: 13.4,
|
||||||
|
delivery: "$4.99 delivery",
|
||||||
|
rating: 4.7,
|
||||||
|
reviews: 128,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
const results = await searchProductOffers("dry mango tulip price", {
|
||||||
|
baseUrl: "https://serpapi.test/search.json",
|
||||||
|
serpapiApiKey: "serpapi-key",
|
||||||
|
fetchImpl: fetchMock as unknown as typeof fetch,
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(results).toHaveLength(1);
|
||||||
|
expect(results[0]?.domain).toBe("store.example");
|
||||||
|
expect(results[0]?.detectedPrice).toBe(13.4);
|
||||||
|
expect(results[0]?.detectedPriceText).toBe("$13.40");
|
||||||
|
expect(results[0]?.engines).toEqual(["serpapi google shopping"]);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("searchProductOffers applies result limits and handles empty results", async () => {
|
||||||
|
const fetchMock = mock(async () =>
|
||||||
|
Response.json({
|
||||||
|
results: [
|
||||||
|
{ title: "One", url: "https://one.example", content: "No price" },
|
||||||
|
{ title: "Two", url: "https://two.example", content: "$20.00" },
|
||||||
|
],
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
|
||||||
|
const limited = await searchProductOffers("romand palette", {
|
||||||
|
provider: "searxng",
|
||||||
|
baseUrl: "https://searxng.test/",
|
||||||
|
fetchImpl: fetchMock as unknown as typeof fetch,
|
||||||
|
maxResults: 1,
|
||||||
|
});
|
||||||
|
expect(limited).toHaveLength(1);
|
||||||
|
expect(limited[0]?.domain).toBe("two.example");
|
||||||
|
|
||||||
|
const emptyFetch = mock(async () => Response.json({ results: [] }));
|
||||||
|
const empty = await searchProductOffers("missing product", {
|
||||||
|
provider: "searxng",
|
||||||
|
baseUrl: "https://searxng.test/",
|
||||||
|
fetchImpl: emptyFetch as unknown as typeof fetch,
|
||||||
|
});
|
||||||
|
expect(empty).toEqual([]);
|
||||||
|
});
|
||||||
777
src/searxng.ts
Normal file
777
src/searxng.ts
Normal file
@@ -0,0 +1,777 @@
|
|||||||
|
const DEFAULT_SEARXNG_URL = "https://searxng.nvictor.me/";
|
||||||
|
const DEFAULT_GOOGLE_CUSTOM_SEARCH_URL =
|
||||||
|
"https://www.googleapis.com/customsearch/v1";
|
||||||
|
const DEFAULT_SERPAPI_URL = "https://serpapi.com/search.json";
|
||||||
|
const DEFAULT_TIMEOUT_MS = 10_000;
|
||||||
|
const DEFAULT_MAX_RESULTS = 10;
|
||||||
|
const ASIN_REGEX = /^B[0-9A-Z]{9}$/;
|
||||||
|
const ASIN_MATCH_REGEX = /\bB[0-9A-Z]{9}\b/gi;
|
||||||
|
const PRICE_LABELS = [
|
||||||
|
"selling price",
|
||||||
|
"sale price",
|
||||||
|
"offer price",
|
||||||
|
"current price",
|
||||||
|
"our price",
|
||||||
|
"list price",
|
||||||
|
"price",
|
||||||
|
] as const;
|
||||||
|
const CURRENCY_CODES = "USD|US\\$|EUR|GBP|INR|CAD|AUD";
|
||||||
|
const CURRENCY_SYMBOLS = "$€£₹";
|
||||||
|
const LABELED_PRICE_REGEX =
|
||||||
|
new RegExp(
|
||||||
|
`\\b(selling price|sale price|offer price|current price|our price|list price|price)\\b[^${escapeForCharClass(CURRENCY_SYMBOLS)}0-9]{0,24}((?:${CURRENCY_CODES})?\\s*[${escapeForCharClass(CURRENCY_SYMBOLS)}]\\s*[0-9]{1,5}(?:,[0-9]{3})*(?:\\.[0-9]{2})?|(?:${CURRENCY_CODES})\\s*[0-9]{1,5}(?:,[0-9]{3})*(?:\\.[0-9]{2})?)`,
|
||||||
|
"gi",
|
||||||
|
);
|
||||||
|
const PRICE_REGEX = new RegExp(
|
||||||
|
`((?:${CURRENCY_CODES})?\\s*[${escapeForCharClass(CURRENCY_SYMBOLS)}]\\s*[0-9]{1,5}(?:,[0-9]{3})*(?:\\.[0-9]{2})?|(?:${CURRENCY_CODES})\\s*[0-9]{1,5}(?:,[0-9]{3})*(?:\\.[0-9]{2})?|[0-9]{1,5}(?:,[0-9]{3})*(?:\\.[0-9]{2})?\\s*(?:${CURRENCY_CODES}))`,
|
||||||
|
"gi",
|
||||||
|
);
|
||||||
|
|
||||||
|
export type SearxngOfferSearchResult = {
|
||||||
|
asin?: string;
|
||||||
|
query: string;
|
||||||
|
title: string;
|
||||||
|
url: string;
|
||||||
|
domain: string;
|
||||||
|
snippet: string;
|
||||||
|
rank: number;
|
||||||
|
score: number;
|
||||||
|
matchedAsin?: string;
|
||||||
|
detectedPrice?: number;
|
||||||
|
detectedPriceCurrency?: string;
|
||||||
|
detectedPriceLabel?: string;
|
||||||
|
detectedPriceText?: string;
|
||||||
|
engines: string[];
|
||||||
|
};
|
||||||
|
|
||||||
|
export type SearxngSearchOptions = {
|
||||||
|
provider?: "serpapi" | "google-custom-search" | "searxng";
|
||||||
|
baseUrl?: string;
|
||||||
|
googleApiKey?: string;
|
||||||
|
googleCx?: string;
|
||||||
|
serpapiApiKey?: string;
|
||||||
|
timeoutMs?: number;
|
||||||
|
maxResults?: number;
|
||||||
|
page?: number;
|
||||||
|
categories?: string;
|
||||||
|
engines?: string;
|
||||||
|
includeUnmatchedAsinResults?: boolean;
|
||||||
|
fetchImpl?: typeof fetch;
|
||||||
|
};
|
||||||
|
|
||||||
|
type RawSearchResult = {
|
||||||
|
title: string;
|
||||||
|
url: string;
|
||||||
|
snippet: string;
|
||||||
|
engines: string[];
|
||||||
|
rank: number;
|
||||||
|
};
|
||||||
|
|
||||||
|
type JsonSearchResponse = {
|
||||||
|
results?: Array<Record<string, unknown>>;
|
||||||
|
};
|
||||||
|
|
||||||
|
type PriceDetection = {
|
||||||
|
amount: number;
|
||||||
|
currency: string;
|
||||||
|
text: string;
|
||||||
|
label?: string;
|
||||||
|
};
|
||||||
|
|
||||||
|
export async function searchAsinOffers(
|
||||||
|
asin: string,
|
||||||
|
options: SearxngSearchOptions = {},
|
||||||
|
): Promise<SearxngOfferSearchResult[]> {
|
||||||
|
return searchProductOffers(normalizeAsin(asin), options);
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function searchProductOffers(
|
||||||
|
query: string,
|
||||||
|
options: SearxngSearchOptions = {},
|
||||||
|
): Promise<SearxngOfferSearchResult[]> {
|
||||||
|
const normalizedQuery = query.trim();
|
||||||
|
if (!normalizedQuery) {
|
||||||
|
throw new Error("Search query is required.");
|
||||||
|
}
|
||||||
|
|
||||||
|
const inferredAsin = getAsinQuery(normalizedQuery);
|
||||||
|
const searxngQuery = inferredAsin
|
||||||
|
? `${inferredAsin} price sale offer buy online`
|
||||||
|
: normalizedQuery;
|
||||||
|
const maxResults = positiveInteger(
|
||||||
|
options.maxResults ?? readEnvInt("SEARXNG_MAX_RESULTS", DEFAULT_MAX_RESULTS),
|
||||||
|
DEFAULT_MAX_RESULTS,
|
||||||
|
);
|
||||||
|
const rawResults =
|
||||||
|
options.provider === "searxng"
|
||||||
|
? await fetchSearxngResults(searxngQuery, options)
|
||||||
|
: options.provider === "google-custom-search"
|
||||||
|
? await fetchGoogleCustomSearchResults(searxngQuery, {
|
||||||
|
...options,
|
||||||
|
maxResults,
|
||||||
|
})
|
||||||
|
: await fetchSerpApiGoogleShoppingResults(searxngQuery, {
|
||||||
|
...options,
|
||||||
|
provider: "serpapi",
|
||||||
|
maxResults,
|
||||||
|
});
|
||||||
|
return rawResults
|
||||||
|
.map((result) => normalizeResult(result, searxngQuery, inferredAsin))
|
||||||
|
.filter((result) => {
|
||||||
|
if (!result.url) return false;
|
||||||
|
if (!inferredAsin || options.includeUnmatchedAsinResults) return true;
|
||||||
|
return result.matchedAsin === inferredAsin;
|
||||||
|
})
|
||||||
|
.sort((a, b) => b.score - a.score || a.rank - b.rank)
|
||||||
|
.slice(0, maxResults);
|
||||||
|
}
|
||||||
|
|
||||||
|
export function normalizeAsin(value: string): string {
|
||||||
|
const asin = value.trim().toUpperCase();
|
||||||
|
if (!ASIN_REGEX.test(asin)) {
|
||||||
|
throw new Error(`Invalid ASIN: ${value}`);
|
||||||
|
}
|
||||||
|
return asin;
|
||||||
|
}
|
||||||
|
|
||||||
|
function getAsinQuery(value: string): string | undefined {
|
||||||
|
const normalized = value.trim().toUpperCase();
|
||||||
|
return ASIN_REGEX.test(normalized) ? normalized : undefined;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function fetchSearxngResults(
|
||||||
|
query: string,
|
||||||
|
options: SearxngSearchOptions,
|
||||||
|
): Promise<RawSearchResult[]> {
|
||||||
|
const baseUrl = normalizeBaseUrl(
|
||||||
|
options.baseUrl ?? Bun.env.SEARXNG_URL ?? DEFAULT_SEARXNG_URL,
|
||||||
|
);
|
||||||
|
const timeoutMs = positiveInteger(
|
||||||
|
options.timeoutMs ?? readEnvInt("SEARXNG_TIMEOUT_MS", DEFAULT_TIMEOUT_MS),
|
||||||
|
DEFAULT_TIMEOUT_MS,
|
||||||
|
);
|
||||||
|
const page = positiveInteger(options.page ?? 1, 1);
|
||||||
|
const categories = options.categories ?? "general";
|
||||||
|
const fetchImpl = options.fetchImpl ?? fetch;
|
||||||
|
const requestQuery = applySearxngEngineBang(query, options.engines);
|
||||||
|
|
||||||
|
const jsonUrl = buildSearchUrl(baseUrl, requestQuery, {
|
||||||
|
categories,
|
||||||
|
engines: options.engines,
|
||||||
|
page,
|
||||||
|
format: "json",
|
||||||
|
});
|
||||||
|
const jsonResponse = await fetchWithTimeout(fetchImpl, jsonUrl, timeoutMs);
|
||||||
|
if (isJsonResponse(jsonResponse)) {
|
||||||
|
const json = (await jsonResponse.json()) as JsonSearchResponse;
|
||||||
|
return parseJsonResults(json);
|
||||||
|
}
|
||||||
|
|
||||||
|
const htmlUrl = buildSearchUrl(baseUrl, requestQuery, {
|
||||||
|
categories,
|
||||||
|
engines: options.engines,
|
||||||
|
page,
|
||||||
|
});
|
||||||
|
const htmlResponse = await fetchWithTimeout(fetchImpl, htmlUrl, timeoutMs);
|
||||||
|
if (!htmlResponse.ok) {
|
||||||
|
throw new Error(
|
||||||
|
`SearXNG search failed: status=${htmlResponse.status} url=${htmlUrl.toString()}`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
return parseHtmlResults(await htmlResponse.text());
|
||||||
|
}
|
||||||
|
|
||||||
|
function applySearxngEngineBang(query: string, engines: string | undefined): string {
|
||||||
|
if (!engines || query.trim().startsWith("!")) return query;
|
||||||
|
const engineList = engines
|
||||||
|
.split(",")
|
||||||
|
.map((engine) => engine.trim().toLowerCase())
|
||||||
|
.filter(Boolean);
|
||||||
|
if (engineList.length !== 1) return query;
|
||||||
|
|
||||||
|
const shortcut = searxngEngineShortcut(engineList[0]!);
|
||||||
|
return shortcut ? `!${shortcut} ${query}` : query;
|
||||||
|
}
|
||||||
|
|
||||||
|
function searxngEngineShortcut(engine: string): string | undefined {
|
||||||
|
if (engine === "google") return "go";
|
||||||
|
return undefined;
|
||||||
|
}
|
||||||
|
|
||||||
|
function isJsonResponse(response: Response): boolean {
|
||||||
|
const contentType = response.headers.get("content-type") ?? "";
|
||||||
|
return response.ok && contentType.toLowerCase().includes("application/json");
|
||||||
|
}
|
||||||
|
|
||||||
|
async function fetchWithTimeout(
|
||||||
|
fetchImpl: typeof fetch,
|
||||||
|
url: URL,
|
||||||
|
timeoutMs: number,
|
||||||
|
): Promise<Response> {
|
||||||
|
const controller = new AbortController();
|
||||||
|
const timeout = setTimeout(() => controller.abort(), timeoutMs);
|
||||||
|
try {
|
||||||
|
return await fetchImpl(url, {
|
||||||
|
signal: controller.signal,
|
||||||
|
headers: {
|
||||||
|
accept: "application/json,text/html;q=0.9,*/*;q=0.8",
|
||||||
|
"user-agent": "asin-check/1.0 (+https://searxng.nvictor.me/)",
|
||||||
|
},
|
||||||
|
});
|
||||||
|
} finally {
|
||||||
|
clearTimeout(timeout);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function buildSearchUrl(
|
||||||
|
baseUrl: URL,
|
||||||
|
query: string,
|
||||||
|
params: { categories: string; engines?: string; page: number; format?: string },
|
||||||
|
): URL {
|
||||||
|
const url = new URL("search", baseUrl);
|
||||||
|
url.searchParams.set("q", query);
|
||||||
|
url.searchParams.set("categories", params.categories);
|
||||||
|
if (params.engines) {
|
||||||
|
url.searchParams.set("engines", params.engines);
|
||||||
|
}
|
||||||
|
url.searchParams.set("pageno", String(params.page));
|
||||||
|
if (params.format) {
|
||||||
|
url.searchParams.set("format", params.format);
|
||||||
|
}
|
||||||
|
return url;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function fetchGoogleCustomSearchResults(
|
||||||
|
query: string,
|
||||||
|
options: SearxngSearchOptions,
|
||||||
|
): Promise<RawSearchResult[]> {
|
||||||
|
const apiKey = options.googleApiKey ?? Bun.env.GOOGLE_API_KEY;
|
||||||
|
const cx =
|
||||||
|
options.googleCx ??
|
||||||
|
Bun.env.GOOGLE_CSE_ID ??
|
||||||
|
Bun.env.GOOGLE_CX ??
|
||||||
|
Bun.env.GOOGLE_SEARCH_ENGINE_ID;
|
||||||
|
if (!apiKey) {
|
||||||
|
throw new Error("Missing GOOGLE_API_KEY for Google Custom Search.");
|
||||||
|
}
|
||||||
|
if (!cx) {
|
||||||
|
throw new Error(
|
||||||
|
"Missing Google Custom Search engine id. Set GOOGLE_CSE_ID, GOOGLE_CX, or GOOGLE_SEARCH_ENGINE_ID.",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
const timeoutMs = positiveInteger(
|
||||||
|
options.timeoutMs ?? readEnvInt("SEARXNG_TIMEOUT_MS", DEFAULT_TIMEOUT_MS),
|
||||||
|
DEFAULT_TIMEOUT_MS,
|
||||||
|
);
|
||||||
|
const page = positiveInteger(options.page ?? 1, 1);
|
||||||
|
const num = Math.min(
|
||||||
|
10,
|
||||||
|
positiveInteger(options.maxResults ?? DEFAULT_MAX_RESULTS, DEFAULT_MAX_RESULTS),
|
||||||
|
);
|
||||||
|
const fetchImpl = options.fetchImpl ?? fetch;
|
||||||
|
const url = new URL(options.baseUrl ?? DEFAULT_GOOGLE_CUSTOM_SEARCH_URL);
|
||||||
|
url.searchParams.set("key", apiKey);
|
||||||
|
url.searchParams.set("cx", cx);
|
||||||
|
url.searchParams.set("q", query);
|
||||||
|
url.searchParams.set("num", String(num));
|
||||||
|
url.searchParams.set("start", String((page - 1) * num + 1));
|
||||||
|
|
||||||
|
const response = await fetchWithTimeout(fetchImpl, url, timeoutMs);
|
||||||
|
if (!response.ok) {
|
||||||
|
const body = await response.text().catch(() => "");
|
||||||
|
throw new Error(
|
||||||
|
`Google Custom Search failed: status=${response.status} ${body.slice(0, 300)}`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
const json = (await response.json()) as GoogleCustomSearchResponse;
|
||||||
|
return parseGoogleCustomSearchResults(json);
|
||||||
|
}
|
||||||
|
|
||||||
|
type GoogleCustomSearchResponse = {
|
||||||
|
items?: GoogleCustomSearchItem[];
|
||||||
|
};
|
||||||
|
|
||||||
|
type GoogleCustomSearchItem = {
|
||||||
|
title?: string;
|
||||||
|
link?: string;
|
||||||
|
snippet?: string;
|
||||||
|
displayLink?: string;
|
||||||
|
pagemap?: Record<string, unknown>;
|
||||||
|
};
|
||||||
|
|
||||||
|
type SerpApiShoppingResponse = {
|
||||||
|
shopping_results?: SerpApiShoppingResult[];
|
||||||
|
inline_shopping_results?: SerpApiShoppingResult[];
|
||||||
|
categorized_shopping_results?: Array<{
|
||||||
|
shopping_results?: SerpApiShoppingResult[];
|
||||||
|
}>;
|
||||||
|
error?: string;
|
||||||
|
};
|
||||||
|
|
||||||
|
type SerpApiShoppingResult = {
|
||||||
|
position?: number;
|
||||||
|
title?: string;
|
||||||
|
source?: string;
|
||||||
|
link?: string;
|
||||||
|
product_link?: string;
|
||||||
|
serpapi_product_api?: string;
|
||||||
|
price?: string;
|
||||||
|
extracted_price?: number;
|
||||||
|
old_price?: string;
|
||||||
|
extracted_old_price?: number;
|
||||||
|
delivery?: string;
|
||||||
|
rating?: number;
|
||||||
|
reviews?: number;
|
||||||
|
snippet?: string;
|
||||||
|
};
|
||||||
|
|
||||||
|
async function fetchSerpApiGoogleShoppingResults(
|
||||||
|
query: string,
|
||||||
|
options: SearxngSearchOptions,
|
||||||
|
): Promise<RawSearchResult[]> {
|
||||||
|
const apiKey = options.serpapiApiKey ?? Bun.env.SERPAPI_API_KEY;
|
||||||
|
if (!apiKey) {
|
||||||
|
throw new Error(
|
||||||
|
"Missing SERPAPI_API_KEY. Google does not provide an official public Shopping-tab search API; use SerpApi's google_shopping API or another SERP provider.",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
const timeoutMs = positiveInteger(
|
||||||
|
options.timeoutMs ?? readEnvInt("SEARXNG_TIMEOUT_MS", DEFAULT_TIMEOUT_MS),
|
||||||
|
DEFAULT_TIMEOUT_MS,
|
||||||
|
);
|
||||||
|
const page = positiveInteger(options.page ?? 1, 1);
|
||||||
|
const fetchImpl = options.fetchImpl ?? fetch;
|
||||||
|
const url = new URL(options.baseUrl ?? DEFAULT_SERPAPI_URL);
|
||||||
|
url.searchParams.set("engine", "google_shopping");
|
||||||
|
url.searchParams.set("q", query);
|
||||||
|
url.searchParams.set("api_key", apiKey);
|
||||||
|
url.searchParams.set("google_domain", "google.com");
|
||||||
|
url.searchParams.set("gl", "us");
|
||||||
|
url.searchParams.set("hl", "en");
|
||||||
|
url.searchParams.set("start", String((page - 1) * 60));
|
||||||
|
|
||||||
|
const response = await fetchWithTimeout(fetchImpl, url, timeoutMs);
|
||||||
|
if (!response.ok) {
|
||||||
|
const body = await response.text().catch(() => "");
|
||||||
|
throw new Error(
|
||||||
|
`SerpApi Google Shopping failed: status=${response.status} ${body.slice(0, 300)}`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
const json = (await response.json()) as SerpApiShoppingResponse;
|
||||||
|
if (json.error) {
|
||||||
|
throw new Error(`SerpApi Google Shopping failed: ${json.error}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
return parseSerpApiShoppingResults(json);
|
||||||
|
}
|
||||||
|
|
||||||
|
function parseSerpApiShoppingResults(
|
||||||
|
json: SerpApiShoppingResponse,
|
||||||
|
): RawSearchResult[] {
|
||||||
|
const results = [
|
||||||
|
...(json.shopping_results ?? []),
|
||||||
|
...(json.inline_shopping_results ?? []),
|
||||||
|
...(json.categorized_shopping_results ?? []).flatMap(
|
||||||
|
(category) => category.shopping_results ?? [],
|
||||||
|
),
|
||||||
|
];
|
||||||
|
|
||||||
|
return results.flatMap((item, index) => {
|
||||||
|
const url =
|
||||||
|
optionalString(item.link) ??
|
||||||
|
optionalString(item.product_link) ??
|
||||||
|
optionalString(item.serpapi_product_api);
|
||||||
|
if (!url) return [];
|
||||||
|
|
||||||
|
const priceText = optionalString(item.price);
|
||||||
|
const snippet = [
|
||||||
|
priceText ? `offer price: ${priceText}` : undefined,
|
||||||
|
optionalString(item.old_price)
|
||||||
|
? `list price: ${item.old_price}`
|
||||||
|
: undefined,
|
||||||
|
optionalString(item.source) ? `merchant: ${item.source}` : undefined,
|
||||||
|
optionalString(item.delivery),
|
||||||
|
optionalString(item.snippet),
|
||||||
|
typeof item.rating === "number" ? `rating: ${item.rating}` : undefined,
|
||||||
|
typeof item.reviews === "number" ? `reviews: ${item.reviews}` : undefined,
|
||||||
|
]
|
||||||
|
.filter((value): value is string => !!value)
|
||||||
|
.join(" ");
|
||||||
|
|
||||||
|
return [
|
||||||
|
{
|
||||||
|
title: optionalString(item.title) ?? "",
|
||||||
|
url,
|
||||||
|
snippet,
|
||||||
|
engines: ["serpapi google shopping"],
|
||||||
|
rank: item.position ?? index + 1,
|
||||||
|
},
|
||||||
|
];
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
function parseGoogleCustomSearchResults(
|
||||||
|
json: GoogleCustomSearchResponse,
|
||||||
|
): RawSearchResult[] {
|
||||||
|
return (json.items ?? []).flatMap((item, index) => {
|
||||||
|
const url = optionalString(item.link);
|
||||||
|
if (!url) return [];
|
||||||
|
const metadataText = extractGoogleCustomSearchMetadataText(item);
|
||||||
|
return [
|
||||||
|
{
|
||||||
|
title: optionalString(item.title) ?? "",
|
||||||
|
url,
|
||||||
|
snippet: [optionalString(item.snippet), metadataText]
|
||||||
|
.filter((value): value is string => !!value)
|
||||||
|
.join(" "),
|
||||||
|
engines: ["google custom search"],
|
||||||
|
rank: index + 1,
|
||||||
|
},
|
||||||
|
];
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
function extractGoogleCustomSearchMetadataText(
|
||||||
|
item: GoogleCustomSearchItem,
|
||||||
|
): string {
|
||||||
|
const pagemap = item.pagemap ?? {};
|
||||||
|
const chunks: string[] = [];
|
||||||
|
|
||||||
|
for (const offer of readPagemapObjects(pagemap.offer)) {
|
||||||
|
appendPriceMetadata(chunks, offer);
|
||||||
|
}
|
||||||
|
for (const product of readPagemapObjects(pagemap.product)) {
|
||||||
|
appendPriceMetadata(chunks, product);
|
||||||
|
}
|
||||||
|
for (const metatag of readPagemapObjects(pagemap.metatags)) {
|
||||||
|
appendPriceMetadata(chunks, metatag);
|
||||||
|
}
|
||||||
|
|
||||||
|
return chunks.join(" ");
|
||||||
|
}
|
||||||
|
|
||||||
|
function appendPriceMetadata(chunks: string[], value: Record<string, unknown>): void {
|
||||||
|
const price =
|
||||||
|
optionalString(value.price) ??
|
||||||
|
optionalString(value.lowprice) ??
|
||||||
|
optionalString(value.highprice) ??
|
||||||
|
optionalString(value["product:price:amount"]) ??
|
||||||
|
optionalString(value["og:price:amount"]) ??
|
||||||
|
optionalString(value["twitter:data1"]);
|
||||||
|
if (!price) return;
|
||||||
|
|
||||||
|
const currency =
|
||||||
|
optionalString(value.pricecurrency) ??
|
||||||
|
optionalString(value.priceCurrency) ??
|
||||||
|
optionalString(value["product:price:currency"]) ??
|
||||||
|
optionalString(value["og:price:currency"]);
|
||||||
|
chunks.push(currency ? `offer price: ${currency} ${price}` : `offer price: ${price}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
function readPagemapObjects(value: unknown): Array<Record<string, unknown>> {
|
||||||
|
if (!Array.isArray(value)) return [];
|
||||||
|
return value.filter(
|
||||||
|
(item): item is Record<string, unknown> =>
|
||||||
|
item != null && typeof item === "object" && !Array.isArray(item),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
function parseJsonResults(json: JsonSearchResponse): RawSearchResult[] {
|
||||||
|
return (json.results ?? []).flatMap((result, index) => {
|
||||||
|
const url = optionalString(result.url);
|
||||||
|
if (!url) return [];
|
||||||
|
return [
|
||||||
|
{
|
||||||
|
title: optionalString(result.title) ?? "",
|
||||||
|
url,
|
||||||
|
snippet: optionalString(result.content) ?? "",
|
||||||
|
engines: normalizeEngines(result.engines ?? result.engine),
|
||||||
|
rank: index + 1,
|
||||||
|
},
|
||||||
|
];
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
async function parseHtmlResults(html: string): Promise<RawSearchResult[]> {
|
||||||
|
type Draft = {
|
||||||
|
title: string;
|
||||||
|
url: string;
|
||||||
|
snippet: string;
|
||||||
|
engines: string[];
|
||||||
|
};
|
||||||
|
|
||||||
|
const results: RawSearchResult[] = [];
|
||||||
|
let current: Draft | null = null;
|
||||||
|
let currentTextTarget: "title" | "snippet" | "engine" | null = null;
|
||||||
|
|
||||||
|
const appendText = (text: string) => {
|
||||||
|
if (!current || !currentTextTarget) return;
|
||||||
|
const normalized = text.replace(/\s+/g, " ").trim();
|
||||||
|
if (!normalized) return;
|
||||||
|
|
||||||
|
if (currentTextTarget === "engine") {
|
||||||
|
current.engines.push(normalized);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
current[currentTextTarget] = appendWithSpace(
|
||||||
|
current[currentTextTarget],
|
||||||
|
normalized,
|
||||||
|
);
|
||||||
|
};
|
||||||
|
|
||||||
|
const response = new HTMLRewriter()
|
||||||
|
.on("article.result", {
|
||||||
|
element(element) {
|
||||||
|
current = { title: "", url: "", snippet: "", engines: [] };
|
||||||
|
const onEndTag = (element as unknown as {
|
||||||
|
onEndTag?: (handler: () => void) => void;
|
||||||
|
}).onEndTag;
|
||||||
|
onEndTag?.call(element, () => {
|
||||||
|
if (current?.url) {
|
||||||
|
results.push({ ...current, rank: results.length + 1 });
|
||||||
|
}
|
||||||
|
current = null;
|
||||||
|
currentTextTarget = null;
|
||||||
|
});
|
||||||
|
},
|
||||||
|
})
|
||||||
|
.on("article.result a.url_header", {
|
||||||
|
element(element) {
|
||||||
|
if (current && !current.url) {
|
||||||
|
current.url = element.getAttribute("href") ?? "";
|
||||||
|
}
|
||||||
|
},
|
||||||
|
})
|
||||||
|
.on("article.result h3 a", {
|
||||||
|
element(element) {
|
||||||
|
if (current && !current.url) {
|
||||||
|
current.url = element.getAttribute("href") ?? "";
|
||||||
|
}
|
||||||
|
currentTextTarget = "title";
|
||||||
|
},
|
||||||
|
text(text) {
|
||||||
|
appendText(text.text);
|
||||||
|
if (text.lastInTextNode) currentTextTarget = null;
|
||||||
|
},
|
||||||
|
})
|
||||||
|
.on("article.result p.content", {
|
||||||
|
text(text) {
|
||||||
|
currentTextTarget = "snippet";
|
||||||
|
appendText(text.text);
|
||||||
|
if (text.lastInTextNode) currentTextTarget = null;
|
||||||
|
},
|
||||||
|
})
|
||||||
|
.on("article.result .engines span", {
|
||||||
|
text(text) {
|
||||||
|
currentTextTarget = "engine";
|
||||||
|
appendText(text.text);
|
||||||
|
if (text.lastInTextNode) currentTextTarget = null;
|
||||||
|
},
|
||||||
|
})
|
||||||
|
.transform(new Response(html));
|
||||||
|
|
||||||
|
await response.text();
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
|
||||||
|
function normalizeResult(
|
||||||
|
raw: RawSearchResult,
|
||||||
|
query: string,
|
||||||
|
asin?: string,
|
||||||
|
): SearxngOfferSearchResult {
|
||||||
|
const url = normalizeUrl(raw.url);
|
||||||
|
const domain = extractDomain(url);
|
||||||
|
const title = normalizeText(raw.title);
|
||||||
|
const snippet = normalizeText(raw.snippet);
|
||||||
|
const matchedAsin = findMatchedAsin(`${title} ${snippet} ${url}`);
|
||||||
|
const detectedPrice = detectPrice(`${title} ${snippet}`);
|
||||||
|
const score = scoreResult({
|
||||||
|
asin,
|
||||||
|
matchedAsin,
|
||||||
|
detectedPrice: detectedPrice?.amount,
|
||||||
|
domain,
|
||||||
|
rank: raw.rank,
|
||||||
|
});
|
||||||
|
|
||||||
|
return {
|
||||||
|
...(asin ? { asin } : {}),
|
||||||
|
query,
|
||||||
|
title,
|
||||||
|
url,
|
||||||
|
domain,
|
||||||
|
snippet,
|
||||||
|
rank: raw.rank,
|
||||||
|
score,
|
||||||
|
...(matchedAsin ? { matchedAsin } : {}),
|
||||||
|
...(detectedPrice
|
||||||
|
? {
|
||||||
|
detectedPrice: detectedPrice.amount,
|
||||||
|
detectedPriceCurrency: detectedPrice.currency,
|
||||||
|
...(detectedPrice.label
|
||||||
|
? { detectedPriceLabel: detectedPrice.label }
|
||||||
|
: {}),
|
||||||
|
detectedPriceText: detectedPrice.text,
|
||||||
|
}
|
||||||
|
: {}),
|
||||||
|
engines: dedupe(raw.engines.map(normalizeText).filter(Boolean)),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
function scoreResult(input: {
|
||||||
|
asin?: string;
|
||||||
|
matchedAsin?: string;
|
||||||
|
detectedPrice?: number;
|
||||||
|
domain: string;
|
||||||
|
rank: number;
|
||||||
|
}): number {
|
||||||
|
let score = 100 - input.rank;
|
||||||
|
if (input.asin && input.matchedAsin === input.asin) score += 80;
|
||||||
|
if (input.matchedAsin && !input.asin) score += 40;
|
||||||
|
if (input.detectedPrice != null) score += 30;
|
||||||
|
if (input.domain && !isAmazonDomain(input.domain)) score += 20;
|
||||||
|
if (isAmazonDomain(input.domain)) score -= 15;
|
||||||
|
return score;
|
||||||
|
}
|
||||||
|
|
||||||
|
function normalizeBaseUrl(value: string): URL {
|
||||||
|
const url = new URL(value);
|
||||||
|
if (!url.pathname.endsWith("/")) {
|
||||||
|
url.pathname = `${url.pathname}/`;
|
||||||
|
}
|
||||||
|
return url;
|
||||||
|
}
|
||||||
|
|
||||||
|
function normalizeUrl(value: string): string {
|
||||||
|
try {
|
||||||
|
return new URL(value).toString();
|
||||||
|
} catch {
|
||||||
|
return value.trim();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function extractDomain(value: string): string {
|
||||||
|
try {
|
||||||
|
return new URL(value).hostname.replace(/^www\./i, "").toLowerCase();
|
||||||
|
} catch {
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function isAmazonDomain(domain: string): boolean {
|
||||||
|
return /(^|\.)amazon\./i.test(domain);
|
||||||
|
}
|
||||||
|
|
||||||
|
function findMatchedAsin(value: string): string | undefined {
|
||||||
|
const match = value.toUpperCase().match(ASIN_MATCH_REGEX);
|
||||||
|
return match?.[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
function detectPrice(value: string): PriceDetection | undefined {
|
||||||
|
const labeledCandidates = Array.from(value.matchAll(LABELED_PRICE_REGEX))
|
||||||
|
.map((match) => parsePriceMatch(match[2], match[1]))
|
||||||
|
.filter((price): price is PriceDetection => !!price)
|
||||||
|
.sort(comparePriceDetections);
|
||||||
|
if (labeledCandidates[0]) return labeledCandidates[0];
|
||||||
|
|
||||||
|
const candidates = Array.from(value.matchAll(PRICE_REGEX))
|
||||||
|
.map((match) => parsePriceMatch(match[1]))
|
||||||
|
.filter((price): price is PriceDetection => !!price);
|
||||||
|
return candidates[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
function parsePriceMatch(
|
||||||
|
rawPrice: string | undefined,
|
||||||
|
rawLabel?: string,
|
||||||
|
): PriceDetection | undefined {
|
||||||
|
if (!rawPrice) return undefined;
|
||||||
|
|
||||||
|
const text = normalizeText(rawPrice);
|
||||||
|
const currency = detectCurrency(text);
|
||||||
|
const amountMatch = text.match(/[0-9]{1,5}(?:,[0-9]{3})*(?:\.[0-9]{2})?/);
|
||||||
|
if (!amountMatch?.[0]) return undefined;
|
||||||
|
|
||||||
|
const amount = Number(amountMatch[0].replace(/,/g, ""));
|
||||||
|
if (!Number.isFinite(amount) || amount <= 0) return undefined;
|
||||||
|
|
||||||
|
const label = rawLabel ? normalizeText(rawLabel).toLowerCase() : undefined;
|
||||||
|
return {
|
||||||
|
amount,
|
||||||
|
currency,
|
||||||
|
text,
|
||||||
|
...(label ? { label } : {}),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
function comparePriceDetections(a: PriceDetection, b: PriceDetection): number {
|
||||||
|
return priceLabelRank(a.label) - priceLabelRank(b.label);
|
||||||
|
}
|
||||||
|
|
||||||
|
function priceLabelRank(label: string | undefined): number {
|
||||||
|
if (!label) return PRICE_LABELS.length;
|
||||||
|
const index = PRICE_LABELS.indexOf(label as (typeof PRICE_LABELS)[number]);
|
||||||
|
return index === -1 ? PRICE_LABELS.length : index;
|
||||||
|
}
|
||||||
|
|
||||||
|
function detectCurrency(value: string): string {
|
||||||
|
if (/\b(EUR)\b|€/i.test(value)) return "EUR";
|
||||||
|
if (/\b(GBP)\b|£/i.test(value)) return "GBP";
|
||||||
|
if (/\b(INR)\b|₹/i.test(value)) return "INR";
|
||||||
|
if (/\b(CAD)\b/i.test(value)) return "CAD";
|
||||||
|
if (/\b(AUD)\b/i.test(value)) return "AUD";
|
||||||
|
return "USD";
|
||||||
|
}
|
||||||
|
|
||||||
|
function escapeForCharClass(value: string): string {
|
||||||
|
return value.replace(/[-\\\]^]/g, "\\$&");
|
||||||
|
}
|
||||||
|
|
||||||
|
function normalizeEngines(value: unknown): string[] {
|
||||||
|
if (Array.isArray(value)) {
|
||||||
|
return value.map(String).filter(Boolean);
|
||||||
|
}
|
||||||
|
const engine = optionalString(value);
|
||||||
|
return engine ? [engine] : [];
|
||||||
|
}
|
||||||
|
|
||||||
|
function optionalString(value: unknown): string | undefined {
|
||||||
|
if (value == null) return undefined;
|
||||||
|
const text = String(value).trim();
|
||||||
|
return text ? text : undefined;
|
||||||
|
}
|
||||||
|
|
||||||
|
function normalizeText(value: string): string {
|
||||||
|
return decodeHtmlEntities(value).replace(/\s+/g, " ").trim();
|
||||||
|
}
|
||||||
|
|
||||||
|
function appendWithSpace(left: string, right: string): string {
|
||||||
|
return left ? `${left} ${right}` : right;
|
||||||
|
}
|
||||||
|
|
||||||
|
function decodeHtmlEntities(value: string): string {
|
||||||
|
return value
|
||||||
|
.replace(/&/g, "&")
|
||||||
|
.replace(/"/g, '"')
|
||||||
|
.replace(/'/g, "'")
|
||||||
|
.replace(/</g, "<")
|
||||||
|
.replace(/>/g, ">")
|
||||||
|
.replace(/ /g, " ");
|
||||||
|
}
|
||||||
|
|
||||||
|
function dedupe(values: string[]): string[] {
|
||||||
|
return Array.from(new Set(values));
|
||||||
|
}
|
||||||
|
|
||||||
|
function readEnvInt(key: string, fallback: number): number {
|
||||||
|
const parsed = Number(Bun.env[key]);
|
||||||
|
return Number.isFinite(parsed) ? parsed : fallback;
|
||||||
|
}
|
||||||
|
|
||||||
|
function positiveInteger(value: number, fallback: number): number {
|
||||||
|
return Number.isInteger(value) && value > 0 ? value : fallback;
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user