- Implemented supplier scoring logic in `supplier-scoring.ts` with functions to compute demand score, competition penalty, and overall supplier product score. - Created unit tests for supplier scoring in `supplier-scoring.test.ts` to validate scoring logic against various scenarios. - Developed UPC file analysis tool in `upc-file-analysis.ts` to process UPCs in batches, fetch product data from Keepa and SP-API, and generate supplier results. - Added UPC input reading functionality in `upc-file-reader.ts` to handle XLSX and XLS files, including validation for UPC formats. - Introduced a command-line tool in `upc-lookup.ts` for looking up UPCs and displaying detailed results or mappings to ASINs. - Enhanced error handling and logging throughout the new modules for better traceability and user feedback.
778 lines
22 KiB
TypeScript
778 lines
22 KiB
TypeScript
const DEFAULT_SEARXNG_URL = "https://searxng.nvictor.me/";
|
|
const DEFAULT_GOOGLE_CUSTOM_SEARCH_URL =
|
|
"https://www.googleapis.com/customsearch/v1";
|
|
const DEFAULT_SERPAPI_URL = "https://serpapi.com/search.json";
|
|
const DEFAULT_TIMEOUT_MS = 10_000;
|
|
const DEFAULT_MAX_RESULTS = 10;
|
|
const ASIN_REGEX = /^B[0-9A-Z]{9}$/;
|
|
const ASIN_MATCH_REGEX = /\bB[0-9A-Z]{9}\b/gi;
|
|
const PRICE_LABELS = [
|
|
"selling price",
|
|
"sale price",
|
|
"offer price",
|
|
"current price",
|
|
"our price",
|
|
"list price",
|
|
"price",
|
|
] as const;
|
|
const CURRENCY_CODES = "USD|US\\$|EUR|GBP|INR|CAD|AUD";
|
|
const CURRENCY_SYMBOLS = "$€£₹";
|
|
const LABELED_PRICE_REGEX =
|
|
new RegExp(
|
|
`\\b(selling price|sale price|offer price|current price|our price|list price|price)\\b[^${escapeForCharClass(CURRENCY_SYMBOLS)}0-9]{0,24}((?:${CURRENCY_CODES})?\\s*[${escapeForCharClass(CURRENCY_SYMBOLS)}]\\s*[0-9]{1,5}(?:,[0-9]{3})*(?:\\.[0-9]{2})?|(?:${CURRENCY_CODES})\\s*[0-9]{1,5}(?:,[0-9]{3})*(?:\\.[0-9]{2})?)`,
|
|
"gi",
|
|
);
|
|
const PRICE_REGEX = new RegExp(
|
|
`((?:${CURRENCY_CODES})?\\s*[${escapeForCharClass(CURRENCY_SYMBOLS)}]\\s*[0-9]{1,5}(?:,[0-9]{3})*(?:\\.[0-9]{2})?|(?:${CURRENCY_CODES})\\s*[0-9]{1,5}(?:,[0-9]{3})*(?:\\.[0-9]{2})?|[0-9]{1,5}(?:,[0-9]{3})*(?:\\.[0-9]{2})?\\s*(?:${CURRENCY_CODES}))`,
|
|
"gi",
|
|
);
|
|
|
|
export type SearxngOfferSearchResult = {
|
|
asin?: string;
|
|
query: string;
|
|
title: string;
|
|
url: string;
|
|
domain: string;
|
|
snippet: string;
|
|
rank: number;
|
|
score: number;
|
|
matchedAsin?: string;
|
|
detectedPrice?: number;
|
|
detectedPriceCurrency?: string;
|
|
detectedPriceLabel?: string;
|
|
detectedPriceText?: string;
|
|
engines: string[];
|
|
};
|
|
|
|
export type SearxngSearchOptions = {
|
|
provider?: "serpapi" | "google-custom-search" | "searxng";
|
|
baseUrl?: string;
|
|
googleApiKey?: string;
|
|
googleCx?: string;
|
|
serpapiApiKey?: string;
|
|
timeoutMs?: number;
|
|
maxResults?: number;
|
|
page?: number;
|
|
categories?: string;
|
|
engines?: string;
|
|
includeUnmatchedAsinResults?: boolean;
|
|
fetchImpl?: typeof fetch;
|
|
};
|
|
|
|
type RawSearchResult = {
|
|
title: string;
|
|
url: string;
|
|
snippet: string;
|
|
engines: string[];
|
|
rank: number;
|
|
};
|
|
|
|
type JsonSearchResponse = {
|
|
results?: Array<Record<string, unknown>>;
|
|
};
|
|
|
|
type PriceDetection = {
|
|
amount: number;
|
|
currency: string;
|
|
text: string;
|
|
label?: string;
|
|
};
|
|
|
|
export async function searchAsinOffers(
|
|
asin: string,
|
|
options: SearxngSearchOptions = {},
|
|
): Promise<SearxngOfferSearchResult[]> {
|
|
return searchProductOffers(normalizeAsin(asin), options);
|
|
}
|
|
|
|
export async function searchProductOffers(
|
|
query: string,
|
|
options: SearxngSearchOptions = {},
|
|
): Promise<SearxngOfferSearchResult[]> {
|
|
const normalizedQuery = query.trim();
|
|
if (!normalizedQuery) {
|
|
throw new Error("Search query is required.");
|
|
}
|
|
|
|
const inferredAsin = getAsinQuery(normalizedQuery);
|
|
const searxngQuery = inferredAsin
|
|
? `${inferredAsin} price sale offer buy online`
|
|
: normalizedQuery;
|
|
const maxResults = positiveInteger(
|
|
options.maxResults ?? readEnvInt("SEARXNG_MAX_RESULTS", DEFAULT_MAX_RESULTS),
|
|
DEFAULT_MAX_RESULTS,
|
|
);
|
|
const rawResults =
|
|
options.provider === "searxng"
|
|
? await fetchSearxngResults(searxngQuery, options)
|
|
: options.provider === "google-custom-search"
|
|
? await fetchGoogleCustomSearchResults(searxngQuery, {
|
|
...options,
|
|
maxResults,
|
|
})
|
|
: await fetchSerpApiGoogleShoppingResults(searxngQuery, {
|
|
...options,
|
|
provider: "serpapi",
|
|
maxResults,
|
|
});
|
|
return rawResults
|
|
.map((result) => normalizeResult(result, searxngQuery, inferredAsin))
|
|
.filter((result) => {
|
|
if (!result.url) return false;
|
|
if (!inferredAsin || options.includeUnmatchedAsinResults) return true;
|
|
return result.matchedAsin === inferredAsin;
|
|
})
|
|
.sort((a, b) => b.score - a.score || a.rank - b.rank)
|
|
.slice(0, maxResults);
|
|
}
|
|
|
|
export function normalizeAsin(value: string): string {
|
|
const asin = value.trim().toUpperCase();
|
|
if (!ASIN_REGEX.test(asin)) {
|
|
throw new Error(`Invalid ASIN: ${value}`);
|
|
}
|
|
return asin;
|
|
}
|
|
|
|
function getAsinQuery(value: string): string | undefined {
|
|
const normalized = value.trim().toUpperCase();
|
|
return ASIN_REGEX.test(normalized) ? normalized : undefined;
|
|
}
|
|
|
|
async function fetchSearxngResults(
|
|
query: string,
|
|
options: SearxngSearchOptions,
|
|
): Promise<RawSearchResult[]> {
|
|
const baseUrl = normalizeBaseUrl(
|
|
options.baseUrl ?? Bun.env.SEARXNG_URL ?? DEFAULT_SEARXNG_URL,
|
|
);
|
|
const timeoutMs = positiveInteger(
|
|
options.timeoutMs ?? readEnvInt("SEARXNG_TIMEOUT_MS", DEFAULT_TIMEOUT_MS),
|
|
DEFAULT_TIMEOUT_MS,
|
|
);
|
|
const page = positiveInteger(options.page ?? 1, 1);
|
|
const categories = options.categories ?? "general";
|
|
const fetchImpl = options.fetchImpl ?? fetch;
|
|
const requestQuery = applySearxngEngineBang(query, options.engines);
|
|
|
|
const jsonUrl = buildSearchUrl(baseUrl, requestQuery, {
|
|
categories,
|
|
engines: options.engines,
|
|
page,
|
|
format: "json",
|
|
});
|
|
const jsonResponse = await fetchWithTimeout(fetchImpl, jsonUrl, timeoutMs);
|
|
if (isJsonResponse(jsonResponse)) {
|
|
const json = (await jsonResponse.json()) as JsonSearchResponse;
|
|
return parseJsonResults(json);
|
|
}
|
|
|
|
const htmlUrl = buildSearchUrl(baseUrl, requestQuery, {
|
|
categories,
|
|
engines: options.engines,
|
|
page,
|
|
});
|
|
const htmlResponse = await fetchWithTimeout(fetchImpl, htmlUrl, timeoutMs);
|
|
if (!htmlResponse.ok) {
|
|
throw new Error(
|
|
`SearXNG search failed: status=${htmlResponse.status} url=${htmlUrl.toString()}`,
|
|
);
|
|
}
|
|
|
|
return parseHtmlResults(await htmlResponse.text());
|
|
}
|
|
|
|
function applySearxngEngineBang(query: string, engines: string | undefined): string {
|
|
if (!engines || query.trim().startsWith("!")) return query;
|
|
const engineList = engines
|
|
.split(",")
|
|
.map((engine) => engine.trim().toLowerCase())
|
|
.filter(Boolean);
|
|
if (engineList.length !== 1) return query;
|
|
|
|
const shortcut = searxngEngineShortcut(engineList[0]!);
|
|
return shortcut ? `!${shortcut} ${query}` : query;
|
|
}
|
|
|
|
function searxngEngineShortcut(engine: string): string | undefined {
|
|
if (engine === "google") return "go";
|
|
return undefined;
|
|
}
|
|
|
|
function isJsonResponse(response: Response): boolean {
|
|
const contentType = response.headers.get("content-type") ?? "";
|
|
return response.ok && contentType.toLowerCase().includes("application/json");
|
|
}
|
|
|
|
async function fetchWithTimeout(
|
|
fetchImpl: typeof fetch,
|
|
url: URL,
|
|
timeoutMs: number,
|
|
): Promise<Response> {
|
|
const controller = new AbortController();
|
|
const timeout = setTimeout(() => controller.abort(), timeoutMs);
|
|
try {
|
|
return await fetchImpl(url, {
|
|
signal: controller.signal,
|
|
headers: {
|
|
accept: "application/json,text/html;q=0.9,*/*;q=0.8",
|
|
"user-agent": "asin-check/1.0 (+https://searxng.nvictor.me/)",
|
|
},
|
|
});
|
|
} finally {
|
|
clearTimeout(timeout);
|
|
}
|
|
}
|
|
|
|
function buildSearchUrl(
|
|
baseUrl: URL,
|
|
query: string,
|
|
params: { categories: string; engines?: string; page: number; format?: string },
|
|
): URL {
|
|
const url = new URL("search", baseUrl);
|
|
url.searchParams.set("q", query);
|
|
url.searchParams.set("categories", params.categories);
|
|
if (params.engines) {
|
|
url.searchParams.set("engines", params.engines);
|
|
}
|
|
url.searchParams.set("pageno", String(params.page));
|
|
if (params.format) {
|
|
url.searchParams.set("format", params.format);
|
|
}
|
|
return url;
|
|
}
|
|
|
|
async function fetchGoogleCustomSearchResults(
|
|
query: string,
|
|
options: SearxngSearchOptions,
|
|
): Promise<RawSearchResult[]> {
|
|
const apiKey = options.googleApiKey ?? Bun.env.GOOGLE_API_KEY;
|
|
const cx =
|
|
options.googleCx ??
|
|
Bun.env.GOOGLE_CSE_ID ??
|
|
Bun.env.GOOGLE_CX ??
|
|
Bun.env.GOOGLE_SEARCH_ENGINE_ID;
|
|
if (!apiKey) {
|
|
throw new Error("Missing GOOGLE_API_KEY for Google Custom Search.");
|
|
}
|
|
if (!cx) {
|
|
throw new Error(
|
|
"Missing Google Custom Search engine id. Set GOOGLE_CSE_ID, GOOGLE_CX, or GOOGLE_SEARCH_ENGINE_ID.",
|
|
);
|
|
}
|
|
|
|
const timeoutMs = positiveInteger(
|
|
options.timeoutMs ?? readEnvInt("SEARXNG_TIMEOUT_MS", DEFAULT_TIMEOUT_MS),
|
|
DEFAULT_TIMEOUT_MS,
|
|
);
|
|
const page = positiveInteger(options.page ?? 1, 1);
|
|
const num = Math.min(
|
|
10,
|
|
positiveInteger(options.maxResults ?? DEFAULT_MAX_RESULTS, DEFAULT_MAX_RESULTS),
|
|
);
|
|
const fetchImpl = options.fetchImpl ?? fetch;
|
|
const url = new URL(options.baseUrl ?? DEFAULT_GOOGLE_CUSTOM_SEARCH_URL);
|
|
url.searchParams.set("key", apiKey);
|
|
url.searchParams.set("cx", cx);
|
|
url.searchParams.set("q", query);
|
|
url.searchParams.set("num", String(num));
|
|
url.searchParams.set("start", String((page - 1) * num + 1));
|
|
|
|
const response = await fetchWithTimeout(fetchImpl, url, timeoutMs);
|
|
if (!response.ok) {
|
|
const body = await response.text().catch(() => "");
|
|
throw new Error(
|
|
`Google Custom Search failed: status=${response.status} ${body.slice(0, 300)}`,
|
|
);
|
|
}
|
|
|
|
const json = (await response.json()) as GoogleCustomSearchResponse;
|
|
return parseGoogleCustomSearchResults(json);
|
|
}
|
|
|
|
type GoogleCustomSearchResponse = {
|
|
items?: GoogleCustomSearchItem[];
|
|
};
|
|
|
|
type GoogleCustomSearchItem = {
|
|
title?: string;
|
|
link?: string;
|
|
snippet?: string;
|
|
displayLink?: string;
|
|
pagemap?: Record<string, unknown>;
|
|
};
|
|
|
|
type SerpApiShoppingResponse = {
|
|
shopping_results?: SerpApiShoppingResult[];
|
|
inline_shopping_results?: SerpApiShoppingResult[];
|
|
categorized_shopping_results?: Array<{
|
|
shopping_results?: SerpApiShoppingResult[];
|
|
}>;
|
|
error?: string;
|
|
};
|
|
|
|
type SerpApiShoppingResult = {
|
|
position?: number;
|
|
title?: string;
|
|
source?: string;
|
|
link?: string;
|
|
product_link?: string;
|
|
serpapi_product_api?: string;
|
|
price?: string;
|
|
extracted_price?: number;
|
|
old_price?: string;
|
|
extracted_old_price?: number;
|
|
delivery?: string;
|
|
rating?: number;
|
|
reviews?: number;
|
|
snippet?: string;
|
|
};
|
|
|
|
async function fetchSerpApiGoogleShoppingResults(
|
|
query: string,
|
|
options: SearxngSearchOptions,
|
|
): Promise<RawSearchResult[]> {
|
|
const apiKey = options.serpapiApiKey ?? Bun.env.SERPAPI_API_KEY;
|
|
if (!apiKey) {
|
|
throw new Error(
|
|
"Missing SERPAPI_API_KEY. Google does not provide an official public Shopping-tab search API; use SerpApi's google_shopping API or another SERP provider.",
|
|
);
|
|
}
|
|
|
|
const timeoutMs = positiveInteger(
|
|
options.timeoutMs ?? readEnvInt("SEARXNG_TIMEOUT_MS", DEFAULT_TIMEOUT_MS),
|
|
DEFAULT_TIMEOUT_MS,
|
|
);
|
|
const page = positiveInteger(options.page ?? 1, 1);
|
|
const fetchImpl = options.fetchImpl ?? fetch;
|
|
const url = new URL(options.baseUrl ?? DEFAULT_SERPAPI_URL);
|
|
url.searchParams.set("engine", "google_shopping");
|
|
url.searchParams.set("q", query);
|
|
url.searchParams.set("api_key", apiKey);
|
|
url.searchParams.set("google_domain", "google.com");
|
|
url.searchParams.set("gl", "us");
|
|
url.searchParams.set("hl", "en");
|
|
url.searchParams.set("start", String((page - 1) * 60));
|
|
|
|
const response = await fetchWithTimeout(fetchImpl, url, timeoutMs);
|
|
if (!response.ok) {
|
|
const body = await response.text().catch(() => "");
|
|
throw new Error(
|
|
`SerpApi Google Shopping failed: status=${response.status} ${body.slice(0, 300)}`,
|
|
);
|
|
}
|
|
|
|
const json = (await response.json()) as SerpApiShoppingResponse;
|
|
if (json.error) {
|
|
throw new Error(`SerpApi Google Shopping failed: ${json.error}`);
|
|
}
|
|
|
|
return parseSerpApiShoppingResults(json);
|
|
}
|
|
|
|
function parseSerpApiShoppingResults(
|
|
json: SerpApiShoppingResponse,
|
|
): RawSearchResult[] {
|
|
const results = [
|
|
...(json.shopping_results ?? []),
|
|
...(json.inline_shopping_results ?? []),
|
|
...(json.categorized_shopping_results ?? []).flatMap(
|
|
(category) => category.shopping_results ?? [],
|
|
),
|
|
];
|
|
|
|
return results.flatMap((item, index) => {
|
|
const url =
|
|
optionalString(item.link) ??
|
|
optionalString(item.product_link) ??
|
|
optionalString(item.serpapi_product_api);
|
|
if (!url) return [];
|
|
|
|
const priceText = optionalString(item.price);
|
|
const snippet = [
|
|
priceText ? `offer price: ${priceText}` : undefined,
|
|
optionalString(item.old_price)
|
|
? `list price: ${item.old_price}`
|
|
: undefined,
|
|
optionalString(item.source) ? `merchant: ${item.source}` : undefined,
|
|
optionalString(item.delivery),
|
|
optionalString(item.snippet),
|
|
typeof item.rating === "number" ? `rating: ${item.rating}` : undefined,
|
|
typeof item.reviews === "number" ? `reviews: ${item.reviews}` : undefined,
|
|
]
|
|
.filter((value): value is string => !!value)
|
|
.join(" ");
|
|
|
|
return [
|
|
{
|
|
title: optionalString(item.title) ?? "",
|
|
url,
|
|
snippet,
|
|
engines: ["serpapi google shopping"],
|
|
rank: item.position ?? index + 1,
|
|
},
|
|
];
|
|
});
|
|
}
|
|
|
|
function parseGoogleCustomSearchResults(
|
|
json: GoogleCustomSearchResponse,
|
|
): RawSearchResult[] {
|
|
return (json.items ?? []).flatMap((item, index) => {
|
|
const url = optionalString(item.link);
|
|
if (!url) return [];
|
|
const metadataText = extractGoogleCustomSearchMetadataText(item);
|
|
return [
|
|
{
|
|
title: optionalString(item.title) ?? "",
|
|
url,
|
|
snippet: [optionalString(item.snippet), metadataText]
|
|
.filter((value): value is string => !!value)
|
|
.join(" "),
|
|
engines: ["google custom search"],
|
|
rank: index + 1,
|
|
},
|
|
];
|
|
});
|
|
}
|
|
|
|
function extractGoogleCustomSearchMetadataText(
|
|
item: GoogleCustomSearchItem,
|
|
): string {
|
|
const pagemap = item.pagemap ?? {};
|
|
const chunks: string[] = [];
|
|
|
|
for (const offer of readPagemapObjects(pagemap.offer)) {
|
|
appendPriceMetadata(chunks, offer);
|
|
}
|
|
for (const product of readPagemapObjects(pagemap.product)) {
|
|
appendPriceMetadata(chunks, product);
|
|
}
|
|
for (const metatag of readPagemapObjects(pagemap.metatags)) {
|
|
appendPriceMetadata(chunks, metatag);
|
|
}
|
|
|
|
return chunks.join(" ");
|
|
}
|
|
|
|
function appendPriceMetadata(chunks: string[], value: Record<string, unknown>): void {
|
|
const price =
|
|
optionalString(value.price) ??
|
|
optionalString(value.lowprice) ??
|
|
optionalString(value.highprice) ??
|
|
optionalString(value["product:price:amount"]) ??
|
|
optionalString(value["og:price:amount"]) ??
|
|
optionalString(value["twitter:data1"]);
|
|
if (!price) return;
|
|
|
|
const currency =
|
|
optionalString(value.pricecurrency) ??
|
|
optionalString(value.priceCurrency) ??
|
|
optionalString(value["product:price:currency"]) ??
|
|
optionalString(value["og:price:currency"]);
|
|
chunks.push(currency ? `offer price: ${currency} ${price}` : `offer price: ${price}`);
|
|
}
|
|
|
|
function readPagemapObjects(value: unknown): Array<Record<string, unknown>> {
|
|
if (!Array.isArray(value)) return [];
|
|
return value.filter(
|
|
(item): item is Record<string, unknown> =>
|
|
item != null && typeof item === "object" && !Array.isArray(item),
|
|
);
|
|
}
|
|
|
|
function parseJsonResults(json: JsonSearchResponse): RawSearchResult[] {
|
|
return (json.results ?? []).flatMap((result, index) => {
|
|
const url = optionalString(result.url);
|
|
if (!url) return [];
|
|
return [
|
|
{
|
|
title: optionalString(result.title) ?? "",
|
|
url,
|
|
snippet: optionalString(result.content) ?? "",
|
|
engines: normalizeEngines(result.engines ?? result.engine),
|
|
rank: index + 1,
|
|
},
|
|
];
|
|
});
|
|
}
|
|
|
|
async function parseHtmlResults(html: string): Promise<RawSearchResult[]> {
|
|
type Draft = {
|
|
title: string;
|
|
url: string;
|
|
snippet: string;
|
|
engines: string[];
|
|
};
|
|
|
|
const results: RawSearchResult[] = [];
|
|
let current: Draft | null = null;
|
|
let currentTextTarget: "title" | "snippet" | "engine" | null = null;
|
|
|
|
const appendText = (text: string) => {
|
|
if (!current || !currentTextTarget) return;
|
|
const normalized = text.replace(/\s+/g, " ").trim();
|
|
if (!normalized) return;
|
|
|
|
if (currentTextTarget === "engine") {
|
|
current.engines.push(normalized);
|
|
return;
|
|
}
|
|
|
|
current[currentTextTarget] = appendWithSpace(
|
|
current[currentTextTarget],
|
|
normalized,
|
|
);
|
|
};
|
|
|
|
const response = new HTMLRewriter()
|
|
.on("article.result", {
|
|
element(element) {
|
|
current = { title: "", url: "", snippet: "", engines: [] };
|
|
const onEndTag = (element as unknown as {
|
|
onEndTag?: (handler: () => void) => void;
|
|
}).onEndTag;
|
|
onEndTag?.call(element, () => {
|
|
if (current?.url) {
|
|
results.push({ ...current, rank: results.length + 1 });
|
|
}
|
|
current = null;
|
|
currentTextTarget = null;
|
|
});
|
|
},
|
|
})
|
|
.on("article.result a.url_header", {
|
|
element(element) {
|
|
if (current && !current.url) {
|
|
current.url = element.getAttribute("href") ?? "";
|
|
}
|
|
},
|
|
})
|
|
.on("article.result h3 a", {
|
|
element(element) {
|
|
if (current && !current.url) {
|
|
current.url = element.getAttribute("href") ?? "";
|
|
}
|
|
currentTextTarget = "title";
|
|
},
|
|
text(text) {
|
|
appendText(text.text);
|
|
if (text.lastInTextNode) currentTextTarget = null;
|
|
},
|
|
})
|
|
.on("article.result p.content", {
|
|
text(text) {
|
|
currentTextTarget = "snippet";
|
|
appendText(text.text);
|
|
if (text.lastInTextNode) currentTextTarget = null;
|
|
},
|
|
})
|
|
.on("article.result .engines span", {
|
|
text(text) {
|
|
currentTextTarget = "engine";
|
|
appendText(text.text);
|
|
if (text.lastInTextNode) currentTextTarget = null;
|
|
},
|
|
})
|
|
.transform(new Response(html));
|
|
|
|
await response.text();
|
|
return results;
|
|
}
|
|
|
|
function normalizeResult(
|
|
raw: RawSearchResult,
|
|
query: string,
|
|
asin?: string,
|
|
): SearxngOfferSearchResult {
|
|
const url = normalizeUrl(raw.url);
|
|
const domain = extractDomain(url);
|
|
const title = normalizeText(raw.title);
|
|
const snippet = normalizeText(raw.snippet);
|
|
const matchedAsin = findMatchedAsin(`${title} ${snippet} ${url}`);
|
|
const detectedPrice = detectPrice(`${title} ${snippet}`);
|
|
const score = scoreResult({
|
|
asin,
|
|
matchedAsin,
|
|
detectedPrice: detectedPrice?.amount,
|
|
domain,
|
|
rank: raw.rank,
|
|
});
|
|
|
|
return {
|
|
...(asin ? { asin } : {}),
|
|
query,
|
|
title,
|
|
url,
|
|
domain,
|
|
snippet,
|
|
rank: raw.rank,
|
|
score,
|
|
...(matchedAsin ? { matchedAsin } : {}),
|
|
...(detectedPrice
|
|
? {
|
|
detectedPrice: detectedPrice.amount,
|
|
detectedPriceCurrency: detectedPrice.currency,
|
|
...(detectedPrice.label
|
|
? { detectedPriceLabel: detectedPrice.label }
|
|
: {}),
|
|
detectedPriceText: detectedPrice.text,
|
|
}
|
|
: {}),
|
|
engines: dedupe(raw.engines.map(normalizeText).filter(Boolean)),
|
|
};
|
|
}
|
|
|
|
function scoreResult(input: {
|
|
asin?: string;
|
|
matchedAsin?: string;
|
|
detectedPrice?: number;
|
|
domain: string;
|
|
rank: number;
|
|
}): number {
|
|
let score = 100 - input.rank;
|
|
if (input.asin && input.matchedAsin === input.asin) score += 80;
|
|
if (input.matchedAsin && !input.asin) score += 40;
|
|
if (input.detectedPrice != null) score += 30;
|
|
if (input.domain && !isAmazonDomain(input.domain)) score += 20;
|
|
if (isAmazonDomain(input.domain)) score -= 15;
|
|
return score;
|
|
}
|
|
|
|
function normalizeBaseUrl(value: string): URL {
|
|
const url = new URL(value);
|
|
if (!url.pathname.endsWith("/")) {
|
|
url.pathname = `${url.pathname}/`;
|
|
}
|
|
return url;
|
|
}
|
|
|
|
function normalizeUrl(value: string): string {
|
|
try {
|
|
return new URL(value).toString();
|
|
} catch {
|
|
return value.trim();
|
|
}
|
|
}
|
|
|
|
function extractDomain(value: string): string {
|
|
try {
|
|
return new URL(value).hostname.replace(/^www\./i, "").toLowerCase();
|
|
} catch {
|
|
return "";
|
|
}
|
|
}
|
|
|
|
function isAmazonDomain(domain: string): boolean {
|
|
return /(^|\.)amazon\./i.test(domain);
|
|
}
|
|
|
|
function findMatchedAsin(value: string): string | undefined {
|
|
const match = value.toUpperCase().match(ASIN_MATCH_REGEX);
|
|
return match?.[0];
|
|
}
|
|
|
|
function detectPrice(value: string): PriceDetection | undefined {
|
|
const labeledCandidates = Array.from(value.matchAll(LABELED_PRICE_REGEX))
|
|
.map((match) => parsePriceMatch(match[2], match[1]))
|
|
.filter((price): price is PriceDetection => !!price)
|
|
.sort(comparePriceDetections);
|
|
if (labeledCandidates[0]) return labeledCandidates[0];
|
|
|
|
const candidates = Array.from(value.matchAll(PRICE_REGEX))
|
|
.map((match) => parsePriceMatch(match[1]))
|
|
.filter((price): price is PriceDetection => !!price);
|
|
return candidates[0];
|
|
}
|
|
|
|
function parsePriceMatch(
|
|
rawPrice: string | undefined,
|
|
rawLabel?: string,
|
|
): PriceDetection | undefined {
|
|
if (!rawPrice) return undefined;
|
|
|
|
const text = normalizeText(rawPrice);
|
|
const currency = detectCurrency(text);
|
|
const amountMatch = text.match(/[0-9]{1,5}(?:,[0-9]{3})*(?:\.[0-9]{2})?/);
|
|
if (!amountMatch?.[0]) return undefined;
|
|
|
|
const amount = Number(amountMatch[0].replace(/,/g, ""));
|
|
if (!Number.isFinite(amount) || amount <= 0) return undefined;
|
|
|
|
const label = rawLabel ? normalizeText(rawLabel).toLowerCase() : undefined;
|
|
return {
|
|
amount,
|
|
currency,
|
|
text,
|
|
...(label ? { label } : {}),
|
|
};
|
|
}
|
|
|
|
function comparePriceDetections(a: PriceDetection, b: PriceDetection): number {
|
|
return priceLabelRank(a.label) - priceLabelRank(b.label);
|
|
}
|
|
|
|
function priceLabelRank(label: string | undefined): number {
|
|
if (!label) return PRICE_LABELS.length;
|
|
const index = PRICE_LABELS.indexOf(label as (typeof PRICE_LABELS)[number]);
|
|
return index === -1 ? PRICE_LABELS.length : index;
|
|
}
|
|
|
|
function detectCurrency(value: string): string {
|
|
if (/\b(EUR)\b|€/i.test(value)) return "EUR";
|
|
if (/\b(GBP)\b|£/i.test(value)) return "GBP";
|
|
if (/\b(INR)\b|₹/i.test(value)) return "INR";
|
|
if (/\b(CAD)\b/i.test(value)) return "CAD";
|
|
if (/\b(AUD)\b/i.test(value)) return "AUD";
|
|
return "USD";
|
|
}
|
|
|
|
function escapeForCharClass(value: string): string {
|
|
return value.replace(/[-\\\]^]/g, "\\$&");
|
|
}
|
|
|
|
function normalizeEngines(value: unknown): string[] {
|
|
if (Array.isArray(value)) {
|
|
return value.map(String).filter(Boolean);
|
|
}
|
|
const engine = optionalString(value);
|
|
return engine ? [engine] : [];
|
|
}
|
|
|
|
function optionalString(value: unknown): string | undefined {
|
|
if (value == null) return undefined;
|
|
const text = String(value).trim();
|
|
return text ? text : undefined;
|
|
}
|
|
|
|
function normalizeText(value: string): string {
|
|
return decodeHtmlEntities(value).replace(/\s+/g, " ").trim();
|
|
}
|
|
|
|
function appendWithSpace(left: string, right: string): string {
|
|
return left ? `${left} ${right}` : right;
|
|
}
|
|
|
|
function decodeHtmlEntities(value: string): string {
|
|
return value
|
|
.replace(/&/g, "&")
|
|
.replace(/"/g, '"')
|
|
.replace(/'/g, "'")
|
|
.replace(/</g, "<")
|
|
.replace(/>/g, ">")
|
|
.replace(/ /g, " ");
|
|
}
|
|
|
|
function dedupe(values: string[]): string[] {
|
|
return Array.from(new Set(values));
|
|
}
|
|
|
|
function readEnvInt(key: string, fallback: number): number {
|
|
const parsed = Number(Bun.env[key]);
|
|
return Number.isFinite(parsed) ? parsed : fallback;
|
|
}
|
|
|
|
function positiveInteger(value: number, fallback: number): number {
|
|
return Number.isInteger(value) && value > 0 ? value : fallback;
|
|
}
|