feat: add supplier scoring and UPC file analysis functionality
- Implemented supplier scoring logic in `supplier-scoring.ts` with functions to compute demand score, competition penalty, and overall supplier product score. - Created unit tests for supplier scoring in `supplier-scoring.test.ts` to validate scoring logic against various scenarios. - Developed UPC file analysis tool in `upc-file-analysis.ts` to process UPCs in batches, fetch product data from Keepa and SP-API, and generate supplier results. - Added UPC input reading functionality in `upc-file-reader.ts` to handle XLSX and XLS files, including validation for UPC formats. - Introduced a command-line tool in `upc-lookup.ts` for looking up UPCs and displaying detailed results or mappings to ASINs. - Enhanced error handling and logging throughout the new modules for better traceability and user feedback.
This commit is contained in:
777
src/integrations/searxng.ts
Normal file
777
src/integrations/searxng.ts
Normal file
@@ -0,0 +1,777 @@
|
||||
const DEFAULT_SEARXNG_URL = "https://searxng.nvictor.me/";
|
||||
const DEFAULT_GOOGLE_CUSTOM_SEARCH_URL =
|
||||
"https://www.googleapis.com/customsearch/v1";
|
||||
const DEFAULT_SERPAPI_URL = "https://serpapi.com/search.json";
|
||||
const DEFAULT_TIMEOUT_MS = 10_000;
|
||||
const DEFAULT_MAX_RESULTS = 10;
|
||||
const ASIN_REGEX = /^B[0-9A-Z]{9}$/;
|
||||
const ASIN_MATCH_REGEX = /\bB[0-9A-Z]{9}\b/gi;
|
||||
const PRICE_LABELS = [
|
||||
"selling price",
|
||||
"sale price",
|
||||
"offer price",
|
||||
"current price",
|
||||
"our price",
|
||||
"list price",
|
||||
"price",
|
||||
] as const;
|
||||
const CURRENCY_CODES = "USD|US\\$|EUR|GBP|INR|CAD|AUD";
|
||||
const CURRENCY_SYMBOLS = "$€£₹";
|
||||
const LABELED_PRICE_REGEX =
|
||||
new RegExp(
|
||||
`\\b(selling price|sale price|offer price|current price|our price|list price|price)\\b[^${escapeForCharClass(CURRENCY_SYMBOLS)}0-9]{0,24}((?:${CURRENCY_CODES})?\\s*[${escapeForCharClass(CURRENCY_SYMBOLS)}]\\s*[0-9]{1,5}(?:,[0-9]{3})*(?:\\.[0-9]{2})?|(?:${CURRENCY_CODES})\\s*[0-9]{1,5}(?:,[0-9]{3})*(?:\\.[0-9]{2})?)`,
|
||||
"gi",
|
||||
);
|
||||
const PRICE_REGEX = new RegExp(
|
||||
`((?:${CURRENCY_CODES})?\\s*[${escapeForCharClass(CURRENCY_SYMBOLS)}]\\s*[0-9]{1,5}(?:,[0-9]{3})*(?:\\.[0-9]{2})?|(?:${CURRENCY_CODES})\\s*[0-9]{1,5}(?:,[0-9]{3})*(?:\\.[0-9]{2})?|[0-9]{1,5}(?:,[0-9]{3})*(?:\\.[0-9]{2})?\\s*(?:${CURRENCY_CODES}))`,
|
||||
"gi",
|
||||
);
|
||||
|
||||
export type SearxngOfferSearchResult = {
|
||||
asin?: string;
|
||||
query: string;
|
||||
title: string;
|
||||
url: string;
|
||||
domain: string;
|
||||
snippet: string;
|
||||
rank: number;
|
||||
score: number;
|
||||
matchedAsin?: string;
|
||||
detectedPrice?: number;
|
||||
detectedPriceCurrency?: string;
|
||||
detectedPriceLabel?: string;
|
||||
detectedPriceText?: string;
|
||||
engines: string[];
|
||||
};
|
||||
|
||||
export type SearxngSearchOptions = {
|
||||
provider?: "serpapi" | "google-custom-search" | "searxng";
|
||||
baseUrl?: string;
|
||||
googleApiKey?: string;
|
||||
googleCx?: string;
|
||||
serpapiApiKey?: string;
|
||||
timeoutMs?: number;
|
||||
maxResults?: number;
|
||||
page?: number;
|
||||
categories?: string;
|
||||
engines?: string;
|
||||
includeUnmatchedAsinResults?: boolean;
|
||||
fetchImpl?: typeof fetch;
|
||||
};
|
||||
|
||||
type RawSearchResult = {
|
||||
title: string;
|
||||
url: string;
|
||||
snippet: string;
|
||||
engines: string[];
|
||||
rank: number;
|
||||
};
|
||||
|
||||
type JsonSearchResponse = {
|
||||
results?: Array<Record<string, unknown>>;
|
||||
};
|
||||
|
||||
type PriceDetection = {
|
||||
amount: number;
|
||||
currency: string;
|
||||
text: string;
|
||||
label?: string;
|
||||
};
|
||||
|
||||
export async function searchAsinOffers(
|
||||
asin: string,
|
||||
options: SearxngSearchOptions = {},
|
||||
): Promise<SearxngOfferSearchResult[]> {
|
||||
return searchProductOffers(normalizeAsin(asin), options);
|
||||
}
|
||||
|
||||
export async function searchProductOffers(
|
||||
query: string,
|
||||
options: SearxngSearchOptions = {},
|
||||
): Promise<SearxngOfferSearchResult[]> {
|
||||
const normalizedQuery = query.trim();
|
||||
if (!normalizedQuery) {
|
||||
throw new Error("Search query is required.");
|
||||
}
|
||||
|
||||
const inferredAsin = getAsinQuery(normalizedQuery);
|
||||
const searxngQuery = inferredAsin
|
||||
? `${inferredAsin} price sale offer buy online`
|
||||
: normalizedQuery;
|
||||
const maxResults = positiveInteger(
|
||||
options.maxResults ?? readEnvInt("SEARXNG_MAX_RESULTS", DEFAULT_MAX_RESULTS),
|
||||
DEFAULT_MAX_RESULTS,
|
||||
);
|
||||
const rawResults =
|
||||
options.provider === "searxng"
|
||||
? await fetchSearxngResults(searxngQuery, options)
|
||||
: options.provider === "google-custom-search"
|
||||
? await fetchGoogleCustomSearchResults(searxngQuery, {
|
||||
...options,
|
||||
maxResults,
|
||||
})
|
||||
: await fetchSerpApiGoogleShoppingResults(searxngQuery, {
|
||||
...options,
|
||||
provider: "serpapi",
|
||||
maxResults,
|
||||
});
|
||||
return rawResults
|
||||
.map((result) => normalizeResult(result, searxngQuery, inferredAsin))
|
||||
.filter((result) => {
|
||||
if (!result.url) return false;
|
||||
if (!inferredAsin || options.includeUnmatchedAsinResults) return true;
|
||||
return result.matchedAsin === inferredAsin;
|
||||
})
|
||||
.sort((a, b) => b.score - a.score || a.rank - b.rank)
|
||||
.slice(0, maxResults);
|
||||
}
|
||||
|
||||
export function normalizeAsin(value: string): string {
|
||||
const asin = value.trim().toUpperCase();
|
||||
if (!ASIN_REGEX.test(asin)) {
|
||||
throw new Error(`Invalid ASIN: ${value}`);
|
||||
}
|
||||
return asin;
|
||||
}
|
||||
|
||||
function getAsinQuery(value: string): string | undefined {
|
||||
const normalized = value.trim().toUpperCase();
|
||||
return ASIN_REGEX.test(normalized) ? normalized : undefined;
|
||||
}
|
||||
|
||||
async function fetchSearxngResults(
|
||||
query: string,
|
||||
options: SearxngSearchOptions,
|
||||
): Promise<RawSearchResult[]> {
|
||||
const baseUrl = normalizeBaseUrl(
|
||||
options.baseUrl ?? Bun.env.SEARXNG_URL ?? DEFAULT_SEARXNG_URL,
|
||||
);
|
||||
const timeoutMs = positiveInteger(
|
||||
options.timeoutMs ?? readEnvInt("SEARXNG_TIMEOUT_MS", DEFAULT_TIMEOUT_MS),
|
||||
DEFAULT_TIMEOUT_MS,
|
||||
);
|
||||
const page = positiveInteger(options.page ?? 1, 1);
|
||||
const categories = options.categories ?? "general";
|
||||
const fetchImpl = options.fetchImpl ?? fetch;
|
||||
const requestQuery = applySearxngEngineBang(query, options.engines);
|
||||
|
||||
const jsonUrl = buildSearchUrl(baseUrl, requestQuery, {
|
||||
categories,
|
||||
engines: options.engines,
|
||||
page,
|
||||
format: "json",
|
||||
});
|
||||
const jsonResponse = await fetchWithTimeout(fetchImpl, jsonUrl, timeoutMs);
|
||||
if (isJsonResponse(jsonResponse)) {
|
||||
const json = (await jsonResponse.json()) as JsonSearchResponse;
|
||||
return parseJsonResults(json);
|
||||
}
|
||||
|
||||
const htmlUrl = buildSearchUrl(baseUrl, requestQuery, {
|
||||
categories,
|
||||
engines: options.engines,
|
||||
page,
|
||||
});
|
||||
const htmlResponse = await fetchWithTimeout(fetchImpl, htmlUrl, timeoutMs);
|
||||
if (!htmlResponse.ok) {
|
||||
throw new Error(
|
||||
`SearXNG search failed: status=${htmlResponse.status} url=${htmlUrl.toString()}`,
|
||||
);
|
||||
}
|
||||
|
||||
return parseHtmlResults(await htmlResponse.text());
|
||||
}
|
||||
|
||||
function applySearxngEngineBang(query: string, engines: string | undefined): string {
|
||||
if (!engines || query.trim().startsWith("!")) return query;
|
||||
const engineList = engines
|
||||
.split(",")
|
||||
.map((engine) => engine.trim().toLowerCase())
|
||||
.filter(Boolean);
|
||||
if (engineList.length !== 1) return query;
|
||||
|
||||
const shortcut = searxngEngineShortcut(engineList[0]!);
|
||||
return shortcut ? `!${shortcut} ${query}` : query;
|
||||
}
|
||||
|
||||
function searxngEngineShortcut(engine: string): string | undefined {
|
||||
if (engine === "google") return "go";
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function isJsonResponse(response: Response): boolean {
|
||||
const contentType = response.headers.get("content-type") ?? "";
|
||||
return response.ok && contentType.toLowerCase().includes("application/json");
|
||||
}
|
||||
|
||||
async function fetchWithTimeout(
|
||||
fetchImpl: typeof fetch,
|
||||
url: URL,
|
||||
timeoutMs: number,
|
||||
): Promise<Response> {
|
||||
const controller = new AbortController();
|
||||
const timeout = setTimeout(() => controller.abort(), timeoutMs);
|
||||
try {
|
||||
return await fetchImpl(url, {
|
||||
signal: controller.signal,
|
||||
headers: {
|
||||
accept: "application/json,text/html;q=0.9,*/*;q=0.8",
|
||||
"user-agent": "asin-check/1.0 (+https://searxng.nvictor.me/)",
|
||||
},
|
||||
});
|
||||
} finally {
|
||||
clearTimeout(timeout);
|
||||
}
|
||||
}
|
||||
|
||||
function buildSearchUrl(
|
||||
baseUrl: URL,
|
||||
query: string,
|
||||
params: { categories: string; engines?: string; page: number; format?: string },
|
||||
): URL {
|
||||
const url = new URL("search", baseUrl);
|
||||
url.searchParams.set("q", query);
|
||||
url.searchParams.set("categories", params.categories);
|
||||
if (params.engines) {
|
||||
url.searchParams.set("engines", params.engines);
|
||||
}
|
||||
url.searchParams.set("pageno", String(params.page));
|
||||
if (params.format) {
|
||||
url.searchParams.set("format", params.format);
|
||||
}
|
||||
return url;
|
||||
}
|
||||
|
||||
async function fetchGoogleCustomSearchResults(
|
||||
query: string,
|
||||
options: SearxngSearchOptions,
|
||||
): Promise<RawSearchResult[]> {
|
||||
const apiKey = options.googleApiKey ?? Bun.env.GOOGLE_API_KEY;
|
||||
const cx =
|
||||
options.googleCx ??
|
||||
Bun.env.GOOGLE_CSE_ID ??
|
||||
Bun.env.GOOGLE_CX ??
|
||||
Bun.env.GOOGLE_SEARCH_ENGINE_ID;
|
||||
if (!apiKey) {
|
||||
throw new Error("Missing GOOGLE_API_KEY for Google Custom Search.");
|
||||
}
|
||||
if (!cx) {
|
||||
throw new Error(
|
||||
"Missing Google Custom Search engine id. Set GOOGLE_CSE_ID, GOOGLE_CX, or GOOGLE_SEARCH_ENGINE_ID.",
|
||||
);
|
||||
}
|
||||
|
||||
const timeoutMs = positiveInteger(
|
||||
options.timeoutMs ?? readEnvInt("SEARXNG_TIMEOUT_MS", DEFAULT_TIMEOUT_MS),
|
||||
DEFAULT_TIMEOUT_MS,
|
||||
);
|
||||
const page = positiveInteger(options.page ?? 1, 1);
|
||||
const num = Math.min(
|
||||
10,
|
||||
positiveInteger(options.maxResults ?? DEFAULT_MAX_RESULTS, DEFAULT_MAX_RESULTS),
|
||||
);
|
||||
const fetchImpl = options.fetchImpl ?? fetch;
|
||||
const url = new URL(options.baseUrl ?? DEFAULT_GOOGLE_CUSTOM_SEARCH_URL);
|
||||
url.searchParams.set("key", apiKey);
|
||||
url.searchParams.set("cx", cx);
|
||||
url.searchParams.set("q", query);
|
||||
url.searchParams.set("num", String(num));
|
||||
url.searchParams.set("start", String((page - 1) * num + 1));
|
||||
|
||||
const response = await fetchWithTimeout(fetchImpl, url, timeoutMs);
|
||||
if (!response.ok) {
|
||||
const body = await response.text().catch(() => "");
|
||||
throw new Error(
|
||||
`Google Custom Search failed: status=${response.status} ${body.slice(0, 300)}`,
|
||||
);
|
||||
}
|
||||
|
||||
const json = (await response.json()) as GoogleCustomSearchResponse;
|
||||
return parseGoogleCustomSearchResults(json);
|
||||
}
|
||||
|
||||
type GoogleCustomSearchResponse = {
|
||||
items?: GoogleCustomSearchItem[];
|
||||
};
|
||||
|
||||
type GoogleCustomSearchItem = {
|
||||
title?: string;
|
||||
link?: string;
|
||||
snippet?: string;
|
||||
displayLink?: string;
|
||||
pagemap?: Record<string, unknown>;
|
||||
};
|
||||
|
||||
type SerpApiShoppingResponse = {
|
||||
shopping_results?: SerpApiShoppingResult[];
|
||||
inline_shopping_results?: SerpApiShoppingResult[];
|
||||
categorized_shopping_results?: Array<{
|
||||
shopping_results?: SerpApiShoppingResult[];
|
||||
}>;
|
||||
error?: string;
|
||||
};
|
||||
|
||||
type SerpApiShoppingResult = {
|
||||
position?: number;
|
||||
title?: string;
|
||||
source?: string;
|
||||
link?: string;
|
||||
product_link?: string;
|
||||
serpapi_product_api?: string;
|
||||
price?: string;
|
||||
extracted_price?: number;
|
||||
old_price?: string;
|
||||
extracted_old_price?: number;
|
||||
delivery?: string;
|
||||
rating?: number;
|
||||
reviews?: number;
|
||||
snippet?: string;
|
||||
};
|
||||
|
||||
async function fetchSerpApiGoogleShoppingResults(
|
||||
query: string,
|
||||
options: SearxngSearchOptions,
|
||||
): Promise<RawSearchResult[]> {
|
||||
const apiKey = options.serpapiApiKey ?? Bun.env.SERPAPI_API_KEY;
|
||||
if (!apiKey) {
|
||||
throw new Error(
|
||||
"Missing SERPAPI_API_KEY. Google does not provide an official public Shopping-tab search API; use SerpApi's google_shopping API or another SERP provider.",
|
||||
);
|
||||
}
|
||||
|
||||
const timeoutMs = positiveInteger(
|
||||
options.timeoutMs ?? readEnvInt("SEARXNG_TIMEOUT_MS", DEFAULT_TIMEOUT_MS),
|
||||
DEFAULT_TIMEOUT_MS,
|
||||
);
|
||||
const page = positiveInteger(options.page ?? 1, 1);
|
||||
const fetchImpl = options.fetchImpl ?? fetch;
|
||||
const url = new URL(options.baseUrl ?? DEFAULT_SERPAPI_URL);
|
||||
url.searchParams.set("engine", "google_shopping");
|
||||
url.searchParams.set("q", query);
|
||||
url.searchParams.set("api_key", apiKey);
|
||||
url.searchParams.set("google_domain", "google.com");
|
||||
url.searchParams.set("gl", "us");
|
||||
url.searchParams.set("hl", "en");
|
||||
url.searchParams.set("start", String((page - 1) * 60));
|
||||
|
||||
const response = await fetchWithTimeout(fetchImpl, url, timeoutMs);
|
||||
if (!response.ok) {
|
||||
const body = await response.text().catch(() => "");
|
||||
throw new Error(
|
||||
`SerpApi Google Shopping failed: status=${response.status} ${body.slice(0, 300)}`,
|
||||
);
|
||||
}
|
||||
|
||||
const json = (await response.json()) as SerpApiShoppingResponse;
|
||||
if (json.error) {
|
||||
throw new Error(`SerpApi Google Shopping failed: ${json.error}`);
|
||||
}
|
||||
|
||||
return parseSerpApiShoppingResults(json);
|
||||
}
|
||||
|
||||
function parseSerpApiShoppingResults(
|
||||
json: SerpApiShoppingResponse,
|
||||
): RawSearchResult[] {
|
||||
const results = [
|
||||
...(json.shopping_results ?? []),
|
||||
...(json.inline_shopping_results ?? []),
|
||||
...(json.categorized_shopping_results ?? []).flatMap(
|
||||
(category) => category.shopping_results ?? [],
|
||||
),
|
||||
];
|
||||
|
||||
return results.flatMap((item, index) => {
|
||||
const url =
|
||||
optionalString(item.link) ??
|
||||
optionalString(item.product_link) ??
|
||||
optionalString(item.serpapi_product_api);
|
||||
if (!url) return [];
|
||||
|
||||
const priceText = optionalString(item.price);
|
||||
const snippet = [
|
||||
priceText ? `offer price: ${priceText}` : undefined,
|
||||
optionalString(item.old_price)
|
||||
? `list price: ${item.old_price}`
|
||||
: undefined,
|
||||
optionalString(item.source) ? `merchant: ${item.source}` : undefined,
|
||||
optionalString(item.delivery),
|
||||
optionalString(item.snippet),
|
||||
typeof item.rating === "number" ? `rating: ${item.rating}` : undefined,
|
||||
typeof item.reviews === "number" ? `reviews: ${item.reviews}` : undefined,
|
||||
]
|
||||
.filter((value): value is string => !!value)
|
||||
.join(" ");
|
||||
|
||||
return [
|
||||
{
|
||||
title: optionalString(item.title) ?? "",
|
||||
url,
|
||||
snippet,
|
||||
engines: ["serpapi google shopping"],
|
||||
rank: item.position ?? index + 1,
|
||||
},
|
||||
];
|
||||
});
|
||||
}
|
||||
|
||||
function parseGoogleCustomSearchResults(
|
||||
json: GoogleCustomSearchResponse,
|
||||
): RawSearchResult[] {
|
||||
return (json.items ?? []).flatMap((item, index) => {
|
||||
const url = optionalString(item.link);
|
||||
if (!url) return [];
|
||||
const metadataText = extractGoogleCustomSearchMetadataText(item);
|
||||
return [
|
||||
{
|
||||
title: optionalString(item.title) ?? "",
|
||||
url,
|
||||
snippet: [optionalString(item.snippet), metadataText]
|
||||
.filter((value): value is string => !!value)
|
||||
.join(" "),
|
||||
engines: ["google custom search"],
|
||||
rank: index + 1,
|
||||
},
|
||||
];
|
||||
});
|
||||
}
|
||||
|
||||
function extractGoogleCustomSearchMetadataText(
|
||||
item: GoogleCustomSearchItem,
|
||||
): string {
|
||||
const pagemap = item.pagemap ?? {};
|
||||
const chunks: string[] = [];
|
||||
|
||||
for (const offer of readPagemapObjects(pagemap.offer)) {
|
||||
appendPriceMetadata(chunks, offer);
|
||||
}
|
||||
for (const product of readPagemapObjects(pagemap.product)) {
|
||||
appendPriceMetadata(chunks, product);
|
||||
}
|
||||
for (const metatag of readPagemapObjects(pagemap.metatags)) {
|
||||
appendPriceMetadata(chunks, metatag);
|
||||
}
|
||||
|
||||
return chunks.join(" ");
|
||||
}
|
||||
|
||||
function appendPriceMetadata(chunks: string[], value: Record<string, unknown>): void {
|
||||
const price =
|
||||
optionalString(value.price) ??
|
||||
optionalString(value.lowprice) ??
|
||||
optionalString(value.highprice) ??
|
||||
optionalString(value["product:price:amount"]) ??
|
||||
optionalString(value["og:price:amount"]) ??
|
||||
optionalString(value["twitter:data1"]);
|
||||
if (!price) return;
|
||||
|
||||
const currency =
|
||||
optionalString(value.pricecurrency) ??
|
||||
optionalString(value.priceCurrency) ??
|
||||
optionalString(value["product:price:currency"]) ??
|
||||
optionalString(value["og:price:currency"]);
|
||||
chunks.push(currency ? `offer price: ${currency} ${price}` : `offer price: ${price}`);
|
||||
}
|
||||
|
||||
function readPagemapObjects(value: unknown): Array<Record<string, unknown>> {
|
||||
if (!Array.isArray(value)) return [];
|
||||
return value.filter(
|
||||
(item): item is Record<string, unknown> =>
|
||||
item != null && typeof item === "object" && !Array.isArray(item),
|
||||
);
|
||||
}
|
||||
|
||||
function parseJsonResults(json: JsonSearchResponse): RawSearchResult[] {
|
||||
return (json.results ?? []).flatMap((result, index) => {
|
||||
const url = optionalString(result.url);
|
||||
if (!url) return [];
|
||||
return [
|
||||
{
|
||||
title: optionalString(result.title) ?? "",
|
||||
url,
|
||||
snippet: optionalString(result.content) ?? "",
|
||||
engines: normalizeEngines(result.engines ?? result.engine),
|
||||
rank: index + 1,
|
||||
},
|
||||
];
|
||||
});
|
||||
}
|
||||
|
||||
async function parseHtmlResults(html: string): Promise<RawSearchResult[]> {
|
||||
type Draft = {
|
||||
title: string;
|
||||
url: string;
|
||||
snippet: string;
|
||||
engines: string[];
|
||||
};
|
||||
|
||||
const results: RawSearchResult[] = [];
|
||||
let current: Draft | null = null;
|
||||
let currentTextTarget: "title" | "snippet" | "engine" | null = null;
|
||||
|
||||
const appendText = (text: string) => {
|
||||
if (!current || !currentTextTarget) return;
|
||||
const normalized = text.replace(/\s+/g, " ").trim();
|
||||
if (!normalized) return;
|
||||
|
||||
if (currentTextTarget === "engine") {
|
||||
current.engines.push(normalized);
|
||||
return;
|
||||
}
|
||||
|
||||
current[currentTextTarget] = appendWithSpace(
|
||||
current[currentTextTarget],
|
||||
normalized,
|
||||
);
|
||||
};
|
||||
|
||||
const response = new HTMLRewriter()
|
||||
.on("article.result", {
|
||||
element(element) {
|
||||
current = { title: "", url: "", snippet: "", engines: [] };
|
||||
const onEndTag = (element as unknown as {
|
||||
onEndTag?: (handler: () => void) => void;
|
||||
}).onEndTag;
|
||||
onEndTag?.call(element, () => {
|
||||
if (current?.url) {
|
||||
results.push({ ...current, rank: results.length + 1 });
|
||||
}
|
||||
current = null;
|
||||
currentTextTarget = null;
|
||||
});
|
||||
},
|
||||
})
|
||||
.on("article.result a.url_header", {
|
||||
element(element) {
|
||||
if (current && !current.url) {
|
||||
current.url = element.getAttribute("href") ?? "";
|
||||
}
|
||||
},
|
||||
})
|
||||
.on("article.result h3 a", {
|
||||
element(element) {
|
||||
if (current && !current.url) {
|
||||
current.url = element.getAttribute("href") ?? "";
|
||||
}
|
||||
currentTextTarget = "title";
|
||||
},
|
||||
text(text) {
|
||||
appendText(text.text);
|
||||
if (text.lastInTextNode) currentTextTarget = null;
|
||||
},
|
||||
})
|
||||
.on("article.result p.content", {
|
||||
text(text) {
|
||||
currentTextTarget = "snippet";
|
||||
appendText(text.text);
|
||||
if (text.lastInTextNode) currentTextTarget = null;
|
||||
},
|
||||
})
|
||||
.on("article.result .engines span", {
|
||||
text(text) {
|
||||
currentTextTarget = "engine";
|
||||
appendText(text.text);
|
||||
if (text.lastInTextNode) currentTextTarget = null;
|
||||
},
|
||||
})
|
||||
.transform(new Response(html));
|
||||
|
||||
await response.text();
|
||||
return results;
|
||||
}
|
||||
|
||||
function normalizeResult(
|
||||
raw: RawSearchResult,
|
||||
query: string,
|
||||
asin?: string,
|
||||
): SearxngOfferSearchResult {
|
||||
const url = normalizeUrl(raw.url);
|
||||
const domain = extractDomain(url);
|
||||
const title = normalizeText(raw.title);
|
||||
const snippet = normalizeText(raw.snippet);
|
||||
const matchedAsin = findMatchedAsin(`${title} ${snippet} ${url}`);
|
||||
const detectedPrice = detectPrice(`${title} ${snippet}`);
|
||||
const score = scoreResult({
|
||||
asin,
|
||||
matchedAsin,
|
||||
detectedPrice: detectedPrice?.amount,
|
||||
domain,
|
||||
rank: raw.rank,
|
||||
});
|
||||
|
||||
return {
|
||||
...(asin ? { asin } : {}),
|
||||
query,
|
||||
title,
|
||||
url,
|
||||
domain,
|
||||
snippet,
|
||||
rank: raw.rank,
|
||||
score,
|
||||
...(matchedAsin ? { matchedAsin } : {}),
|
||||
...(detectedPrice
|
||||
? {
|
||||
detectedPrice: detectedPrice.amount,
|
||||
detectedPriceCurrency: detectedPrice.currency,
|
||||
...(detectedPrice.label
|
||||
? { detectedPriceLabel: detectedPrice.label }
|
||||
: {}),
|
||||
detectedPriceText: detectedPrice.text,
|
||||
}
|
||||
: {}),
|
||||
engines: dedupe(raw.engines.map(normalizeText).filter(Boolean)),
|
||||
};
|
||||
}
|
||||
|
||||
function scoreResult(input: {
|
||||
asin?: string;
|
||||
matchedAsin?: string;
|
||||
detectedPrice?: number;
|
||||
domain: string;
|
||||
rank: number;
|
||||
}): number {
|
||||
let score = 100 - input.rank;
|
||||
if (input.asin && input.matchedAsin === input.asin) score += 80;
|
||||
if (input.matchedAsin && !input.asin) score += 40;
|
||||
if (input.detectedPrice != null) score += 30;
|
||||
if (input.domain && !isAmazonDomain(input.domain)) score += 20;
|
||||
if (isAmazonDomain(input.domain)) score -= 15;
|
||||
return score;
|
||||
}
|
||||
|
||||
function normalizeBaseUrl(value: string): URL {
|
||||
const url = new URL(value);
|
||||
if (!url.pathname.endsWith("/")) {
|
||||
url.pathname = `${url.pathname}/`;
|
||||
}
|
||||
return url;
|
||||
}
|
||||
|
||||
function normalizeUrl(value: string): string {
|
||||
try {
|
||||
return new URL(value).toString();
|
||||
} catch {
|
||||
return value.trim();
|
||||
}
|
||||
}
|
||||
|
||||
function extractDomain(value: string): string {
|
||||
try {
|
||||
return new URL(value).hostname.replace(/^www\./i, "").toLowerCase();
|
||||
} catch {
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
function isAmazonDomain(domain: string): boolean {
|
||||
return /(^|\.)amazon\./i.test(domain);
|
||||
}
|
||||
|
||||
function findMatchedAsin(value: string): string | undefined {
|
||||
const match = value.toUpperCase().match(ASIN_MATCH_REGEX);
|
||||
return match?.[0];
|
||||
}
|
||||
|
||||
function detectPrice(value: string): PriceDetection | undefined {
|
||||
const labeledCandidates = Array.from(value.matchAll(LABELED_PRICE_REGEX))
|
||||
.map((match) => parsePriceMatch(match[2], match[1]))
|
||||
.filter((price): price is PriceDetection => !!price)
|
||||
.sort(comparePriceDetections);
|
||||
if (labeledCandidates[0]) return labeledCandidates[0];
|
||||
|
||||
const candidates = Array.from(value.matchAll(PRICE_REGEX))
|
||||
.map((match) => parsePriceMatch(match[1]))
|
||||
.filter((price): price is PriceDetection => !!price);
|
||||
return candidates[0];
|
||||
}
|
||||
|
||||
function parsePriceMatch(
|
||||
rawPrice: string | undefined,
|
||||
rawLabel?: string,
|
||||
): PriceDetection | undefined {
|
||||
if (!rawPrice) return undefined;
|
||||
|
||||
const text = normalizeText(rawPrice);
|
||||
const currency = detectCurrency(text);
|
||||
const amountMatch = text.match(/[0-9]{1,5}(?:,[0-9]{3})*(?:\.[0-9]{2})?/);
|
||||
if (!amountMatch?.[0]) return undefined;
|
||||
|
||||
const amount = Number(amountMatch[0].replace(/,/g, ""));
|
||||
if (!Number.isFinite(amount) || amount <= 0) return undefined;
|
||||
|
||||
const label = rawLabel ? normalizeText(rawLabel).toLowerCase() : undefined;
|
||||
return {
|
||||
amount,
|
||||
currency,
|
||||
text,
|
||||
...(label ? { label } : {}),
|
||||
};
|
||||
}
|
||||
|
||||
function comparePriceDetections(a: PriceDetection, b: PriceDetection): number {
|
||||
return priceLabelRank(a.label) - priceLabelRank(b.label);
|
||||
}
|
||||
|
||||
function priceLabelRank(label: string | undefined): number {
|
||||
if (!label) return PRICE_LABELS.length;
|
||||
const index = PRICE_LABELS.indexOf(label as (typeof PRICE_LABELS)[number]);
|
||||
return index === -1 ? PRICE_LABELS.length : index;
|
||||
}
|
||||
|
||||
function detectCurrency(value: string): string {
|
||||
if (/\b(EUR)\b|€/i.test(value)) return "EUR";
|
||||
if (/\b(GBP)\b|£/i.test(value)) return "GBP";
|
||||
if (/\b(INR)\b|₹/i.test(value)) return "INR";
|
||||
if (/\b(CAD)\b/i.test(value)) return "CAD";
|
||||
if (/\b(AUD)\b/i.test(value)) return "AUD";
|
||||
return "USD";
|
||||
}
|
||||
|
||||
function escapeForCharClass(value: string): string {
|
||||
return value.replace(/[-\\\]^]/g, "\\$&");
|
||||
}
|
||||
|
||||
function normalizeEngines(value: unknown): string[] {
|
||||
if (Array.isArray(value)) {
|
||||
return value.map(String).filter(Boolean);
|
||||
}
|
||||
const engine = optionalString(value);
|
||||
return engine ? [engine] : [];
|
||||
}
|
||||
|
||||
function optionalString(value: unknown): string | undefined {
|
||||
if (value == null) return undefined;
|
||||
const text = String(value).trim();
|
||||
return text ? text : undefined;
|
||||
}
|
||||
|
||||
function normalizeText(value: string): string {
|
||||
return decodeHtmlEntities(value).replace(/\s+/g, " ").trim();
|
||||
}
|
||||
|
||||
function appendWithSpace(left: string, right: string): string {
|
||||
return left ? `${left} ${right}` : right;
|
||||
}
|
||||
|
||||
function decodeHtmlEntities(value: string): string {
|
||||
return value
|
||||
.replace(/&/g, "&")
|
||||
.replace(/"/g, '"')
|
||||
.replace(/'/g, "'")
|
||||
.replace(/</g, "<")
|
||||
.replace(/>/g, ">")
|
||||
.replace(/ /g, " ");
|
||||
}
|
||||
|
||||
function dedupe(values: string[]): string[] {
|
||||
return Array.from(new Set(values));
|
||||
}
|
||||
|
||||
function readEnvInt(key: string, fallback: number): number {
|
||||
const parsed = Number(Bun.env[key]);
|
||||
return Number.isFinite(parsed) ? parsed : fallback;
|
||||
}
|
||||
|
||||
function positiveInteger(value: number, fallback: number): number {
|
||||
return Number.isInteger(value) && value > 0 ? value : fallback;
|
||||
}
|
||||
Reference in New Issue
Block a user