njsparser/js/tools.js
2026-02-15 00:12:22 +01:00

290 lines
9.4 KiB
JavaScript

/**
* High-level tools for working with flight data
*/
import { getFlightData, hasFlightData } from './parser/flight_data.js';
import { getNextData, hasNextData } from './parser/next_data.js';
import { getNextStaticUrls, getBasePath, _NS } from './parser/urls.js';
import { _manifest_paths } from './parser/manifests.js';
import { DataContainer, DataParent, RSCPayload, Element, resolveType, _tl2obj, _dumped_element_keys } from './parser/types.js';
/**
* Check if page has any NextJS data
* @param {string} html - HTML string
* @param {DOMParser} DOMParser - DOMParser instance
* @returns {boolean} True if NextJS data exists
*/
export function hasNextJS(html, DOMParser) {
return hasNextData(html, DOMParser) || hasFlightData(html, DOMParser);
}
/**
* Iterator for finding elements in flight data
* @param {Object} flightData - Flight data dictionary
* @param {Array} classFilters - Array of Element classes to filter by
* @param {Function} callback - Callback function for filtering
* @param {boolean} recursive - Search recursively
* @yields {Element} Matching elements
*/
export function* finditerInFlightData(flightData, classFilters = null, callback = null, recursive = true) {
if (!flightData) {
return;
}
// Convert class filters to Set
if (classFilters !== null && !(classFilters instanceof Set)) {
classFilters = new Set(classFilters);
}
for (const value of Object.values(flightData)) {
// Recursive search in DataContainer
if (recursive !== false && value instanceof DataContainer) {
const childDict = {};
value.value.forEach((item, idx) => {
childDict[idx] = item;
});
yield* finditerInFlightData(childDict, classFilters, callback, recursive);
}
// Recursive search in DataParent
else if (recursive !== false && value instanceof DataParent) {
yield* finditerInFlightData({ 0: value.children }, classFilters, callback, recursive);
}
// Check if element matches filters
else {
const matchesClass = classFilters === null || classFilters.has(value.constructor);
const matchesCallback = callback === null || callback(value);
if (matchesClass && matchesCallback) {
yield value;
}
}
}
}
/**
* Find all matching elements in flight data
* @param {Object} flightData - Flight data dictionary
* @param {Array} classFilters - Array of Element classes to filter by
* @param {Function} callback - Callback function for filtering
* @param {boolean} recursive - Search recursively
* @returns {Array<Element>} Array of matching elements
*/
export function findallInFlightData(flightData, classFilters = null, callback = null, recursive = true) {
return Array.from(finditerInFlightData(flightData, classFilters, callback, recursive));
}
/**
* Find first matching element in flight data
* @param {Object} flightData - Flight data dictionary
* @param {Array} classFilters - Array of Element classes to filter by
* @param {Function} callback - Callback function for filtering
* @param {boolean} recursive - Search recursively
* @returns {Element|null} First matching element or null
*/
export function findInFlightData(flightData, classFilters = null, callback = null, recursive = true) {
for (const item of finditerInFlightData(flightData, classFilters, callback, recursive)) {
return item;
}
return null;
}
/**
* Find build ID from page
* @param {string} html - HTML string
* @param {DOMParser} DOMParser - DOMParser instance
* @returns {string|null} Build ID or null
*/
export function findBuildId(html, DOMParser) {
// Search in static URLs
const nextStaticUrls = getNextStaticUrls(html, DOMParser);
if (nextStaticUrls) {
const basePath = getBasePath(nextStaticUrls, null, false);
for (const nextStaticUrl of nextStaticUrls) {
const slicedSu = nextStaticUrl.replace(basePath, '').replace(_NS, '');
for (const manifestPath of _manifest_paths) {
if (slicedSu.endsWith(manifestPath)) {
return slicedSu.substring(0, slicedSu.length - manifestPath.length);
}
}
}
}
// Search in next data
const nextData = getNextData(html, DOMParser);
if (nextData) {
if ('buildId' in nextData) {
return nextData.buildId;
} else {
console.warn("Found a next_data dict in the page, but didn't contain any `buildId` key.");
}
}
// Search in flight data
const flightData = getFlightData(html, DOMParser);
if (flightData) {
const found = findInFlightData(flightData, [RSCPayload]);
if (found) {
return found.build_id;
} else {
console.warn(
"Found flight data in the page, but couldn't find the build id. " +
"If you are certain there is one, open an issue with your html to investigate :)"
);
}
}
return null;
}
/**
* Default JSON serializer for flight data elements
* @param {*} obj - Object to serialize
* @returns {Object} Serialized object
*/
export function defaultSerializer(obj) {
if (obj instanceof BeautifulFD) {
const result = {};
for (const [key, value] of obj) {
result[String(key)] = value;
}
return result;
}
if (obj instanceof Element) {
return {
value: obj.value,
value_class: obj.value_class,
index: obj.index,
cls: obj.constructor.name
};
}
throw new TypeError(`Object of type ${typeof obj} is not JSON serializable`);
}
/**
* BeautifulFD class for simplified flight data access
*/
export class BeautifulFD {
constructor(value, DOMParser = null) {
if (typeof value === 'object' && value !== null && !Array.isArray(value)) {
// Check if it's already flight data
const flightData = {};
for (const [key, val] of Object.entries(value)) {
let numKey;
if (typeof key === 'string' && /^\d+$/.test(key)) {
numKey = parseInt(key);
} else if (typeof key === 'number') {
numKey = key;
} else {
throw new TypeError(`Given key ${key} in flight data dict is neither a digit string, nor an int.`);
}
// Check if value needs to be resolved
if (typeof val === 'object' && val !== null && 'value' in val && 'value_class' in val) {
const keys = new Set(Object.keys(val));
if (keys.has('value') && keys.has('value_class') && keys.has('index') && keys.has('cls')) {
flightData[numKey] = resolveType(val.value, val.value_class, val.index, val.cls);
} else {
flightData[numKey] = val;
}
} else if (val instanceof Element) {
flightData[numKey] = val;
} else {
throw new TypeError(`Given value for key ${key} is neither an Element nor a valid element dict.`);
}
}
this._flightData = flightData;
} else if (typeof value === 'string') {
if (!DOMParser) {
throw new Error('DOMParser required when value is HTML string');
}
this._flightData = getFlightData(value, DOMParser);
} else {
throw new TypeError(`Given type "${typeof value}" is unsupported`);
}
}
toString() {
if (!this._flightData) {
return 'BeautifulFD(None)';
}
return `BeautifulFD(<${this.length} elements>)`;
}
get length() {
return this._flightData ? Object.keys(this._flightData).length : 0;
}
valueOf() {
return this._flightData !== null;
}
*[Symbol.iterator]() {
if (this._flightData) {
for (const [key, value] of Object.entries(this._flightData)) {
yield [parseInt(key), value];
}
}
}
as_list() {
return this._flightData ? Object.values(this._flightData) : [];
}
static from_list(list, viaEnumerate = false) {
// Check if all items have index property
const allHaveIndex = list.every(item => typeof item.index === 'number');
if (allHaveIndex) {
const value = {};
for (const item of list) {
value[item.index] = item;
}
return new BeautifulFD(value);
} else if (viaEnumerate) {
const value = {};
list.forEach((item, idx) => {
value[idx] = item;
});
return new BeautifulFD(value);
} else {
throw new Error(
"Cannot load the given list since elements do not all have an index written on them. " +
"You can set `viaEnumerate` to `true` to use the elements' positions in the given list as their indexes."
);
}
}
*find_iter(classFilters = null, callback = null, recursive = true) {
// Convert string class names to actual classes
let newClassFilters = null;
if (classFilters !== null) {
newClassFilters = new Set();
for (const cls of classFilters) {
if (typeof cls === 'function') {
newClassFilters.add(cls);
} else if (typeof cls === 'string') {
if (cls in _tl2obj) {
newClassFilters.add(_tl2obj[cls]);
} else {
throw new Error(`The class filter "${cls}" is not present in the list of conversion: ${Object.keys(_tl2obj).join(', ')}.`);
}
} else {
newClassFilters.add(cls);
}
}
}
yield* finditerInFlightData(this._flightData, newClassFilters, callback, recursive);
}
find_all(classFilters = null, callback = null, recursive = true) {
return Array.from(this.find_iter(classFilters, callback, recursive));
}
find(classFilters = null, callback = null, recursive = true) {
for (const item of this.find_iter(classFilters, callback, recursive)) {
return item;
}
return null;
}
}