290 lines
9.4 KiB
JavaScript
290 lines
9.4 KiB
JavaScript
/**
|
|
* High-level tools for working with flight data
|
|
*/
|
|
|
|
import { getFlightData, hasFlightData } from './parser/flight_data.js';
|
|
import { getNextData, hasNextData } from './parser/next_data.js';
|
|
import { getNextStaticUrls, getBasePath, _NS } from './parser/urls.js';
|
|
import { _manifest_paths } from './parser/manifests.js';
|
|
import { DataContainer, DataParent, RSCPayload, Element, resolveType, _tl2obj, _dumped_element_keys } from './parser/types.js';
|
|
|
|
/**
|
|
* Check if page has any NextJS data
|
|
* @param {string} html - HTML string
|
|
* @param {DOMParser} DOMParser - DOMParser instance
|
|
* @returns {boolean} True if NextJS data exists
|
|
*/
|
|
export function hasNextJS(html, DOMParser) {
|
|
return hasNextData(html, DOMParser) || hasFlightData(html, DOMParser);
|
|
}
|
|
|
|
/**
|
|
* Iterator for finding elements in flight data
|
|
* @param {Object} flightData - Flight data dictionary
|
|
* @param {Array} classFilters - Array of Element classes to filter by
|
|
* @param {Function} callback - Callback function for filtering
|
|
* @param {boolean} recursive - Search recursively
|
|
* @yields {Element} Matching elements
|
|
*/
|
|
export function* finditerInFlightData(flightData, classFilters = null, callback = null, recursive = true) {
|
|
if (!flightData) {
|
|
return;
|
|
}
|
|
|
|
// Convert class filters to Set
|
|
if (classFilters !== null && !(classFilters instanceof Set)) {
|
|
classFilters = new Set(classFilters);
|
|
}
|
|
|
|
for (const value of Object.values(flightData)) {
|
|
// Recursive search in DataContainer
|
|
if (recursive !== false && value instanceof DataContainer) {
|
|
const childDict = {};
|
|
value.value.forEach((item, idx) => {
|
|
childDict[idx] = item;
|
|
});
|
|
yield* finditerInFlightData(childDict, classFilters, callback, recursive);
|
|
}
|
|
// Recursive search in DataParent
|
|
else if (recursive !== false && value instanceof DataParent) {
|
|
yield* finditerInFlightData({ 0: value.children }, classFilters, callback, recursive);
|
|
}
|
|
// Check if element matches filters
|
|
else {
|
|
const matchesClass = classFilters === null || classFilters.has(value.constructor);
|
|
const matchesCallback = callback === null || callback(value);
|
|
|
|
if (matchesClass && matchesCallback) {
|
|
yield value;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Find all matching elements in flight data
|
|
* @param {Object} flightData - Flight data dictionary
|
|
* @param {Array} classFilters - Array of Element classes to filter by
|
|
* @param {Function} callback - Callback function for filtering
|
|
* @param {boolean} recursive - Search recursively
|
|
* @returns {Array<Element>} Array of matching elements
|
|
*/
|
|
export function findallInFlightData(flightData, classFilters = null, callback = null, recursive = true) {
|
|
return Array.from(finditerInFlightData(flightData, classFilters, callback, recursive));
|
|
}
|
|
|
|
/**
|
|
* Find first matching element in flight data
|
|
* @param {Object} flightData - Flight data dictionary
|
|
* @param {Array} classFilters - Array of Element classes to filter by
|
|
* @param {Function} callback - Callback function for filtering
|
|
* @param {boolean} recursive - Search recursively
|
|
* @returns {Element|null} First matching element or null
|
|
*/
|
|
export function findInFlightData(flightData, classFilters = null, callback = null, recursive = true) {
|
|
for (const item of finditerInFlightData(flightData, classFilters, callback, recursive)) {
|
|
return item;
|
|
}
|
|
return null;
|
|
}
|
|
|
|
/**
|
|
* Find build ID from page
|
|
* @param {string} html - HTML string
|
|
* @param {DOMParser} DOMParser - DOMParser instance
|
|
* @returns {string|null} Build ID or null
|
|
*/
|
|
export function findBuildId(html, DOMParser) {
|
|
// Search in static URLs
|
|
const nextStaticUrls = getNextStaticUrls(html, DOMParser);
|
|
if (nextStaticUrls) {
|
|
const basePath = getBasePath(nextStaticUrls, null, false);
|
|
for (const nextStaticUrl of nextStaticUrls) {
|
|
const slicedSu = nextStaticUrl.replace(basePath, '').replace(_NS, '');
|
|
for (const manifestPath of _manifest_paths) {
|
|
if (slicedSu.endsWith(manifestPath)) {
|
|
return slicedSu.substring(0, slicedSu.length - manifestPath.length);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Search in next data
|
|
const nextData = getNextData(html, DOMParser);
|
|
if (nextData) {
|
|
if ('buildId' in nextData) {
|
|
return nextData.buildId;
|
|
} else {
|
|
console.warn("Found a next_data dict in the page, but didn't contain any `buildId` key.");
|
|
}
|
|
}
|
|
|
|
// Search in flight data
|
|
const flightData = getFlightData(html, DOMParser);
|
|
if (flightData) {
|
|
const found = findInFlightData(flightData, [RSCPayload]);
|
|
if (found) {
|
|
return found.build_id;
|
|
} else {
|
|
console.warn(
|
|
"Found flight data in the page, but couldn't find the build id. " +
|
|
"If you are certain there is one, open an issue with your html to investigate :)"
|
|
);
|
|
}
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
/**
|
|
* Default JSON serializer for flight data elements
|
|
* @param {*} obj - Object to serialize
|
|
* @returns {Object} Serialized object
|
|
*/
|
|
export function defaultSerializer(obj) {
|
|
if (obj instanceof BeautifulFD) {
|
|
const result = {};
|
|
for (const [key, value] of obj) {
|
|
result[String(key)] = value;
|
|
}
|
|
return result;
|
|
}
|
|
if (obj instanceof Element) {
|
|
return {
|
|
value: obj.value,
|
|
value_class: obj.value_class,
|
|
index: obj.index,
|
|
cls: obj.constructor.name
|
|
};
|
|
}
|
|
throw new TypeError(`Object of type ${typeof obj} is not JSON serializable`);
|
|
}
|
|
|
|
/**
|
|
* BeautifulFD class for simplified flight data access
|
|
*/
|
|
export class BeautifulFD {
|
|
constructor(value, DOMParser = null) {
|
|
if (typeof value === 'object' && value !== null && !Array.isArray(value)) {
|
|
// Check if it's already flight data
|
|
const flightData = {};
|
|
for (const [key, val] of Object.entries(value)) {
|
|
let numKey;
|
|
if (typeof key === 'string' && /^\d+$/.test(key)) {
|
|
numKey = parseInt(key);
|
|
} else if (typeof key === 'number') {
|
|
numKey = key;
|
|
} else {
|
|
throw new TypeError(`Given key ${key} in flight data dict is neither a digit string, nor an int.`);
|
|
}
|
|
|
|
// Check if value needs to be resolved
|
|
if (typeof val === 'object' && val !== null && 'value' in val && 'value_class' in val) {
|
|
const keys = new Set(Object.keys(val));
|
|
if (keys.has('value') && keys.has('value_class') && keys.has('index') && keys.has('cls')) {
|
|
flightData[numKey] = resolveType(val.value, val.value_class, val.index, val.cls);
|
|
} else {
|
|
flightData[numKey] = val;
|
|
}
|
|
} else if (val instanceof Element) {
|
|
flightData[numKey] = val;
|
|
} else {
|
|
throw new TypeError(`Given value for key ${key} is neither an Element nor a valid element dict.`);
|
|
}
|
|
}
|
|
this._flightData = flightData;
|
|
} else if (typeof value === 'string') {
|
|
if (!DOMParser) {
|
|
throw new Error('DOMParser required when value is HTML string');
|
|
}
|
|
this._flightData = getFlightData(value, DOMParser);
|
|
} else {
|
|
throw new TypeError(`Given type "${typeof value}" is unsupported`);
|
|
}
|
|
}
|
|
|
|
toString() {
|
|
if (!this._flightData) {
|
|
return 'BeautifulFD(None)';
|
|
}
|
|
return `BeautifulFD(<${this.length} elements>)`;
|
|
}
|
|
|
|
get length() {
|
|
return this._flightData ? Object.keys(this._flightData).length : 0;
|
|
}
|
|
|
|
valueOf() {
|
|
return this._flightData !== null;
|
|
}
|
|
|
|
*[Symbol.iterator]() {
|
|
if (this._flightData) {
|
|
for (const [key, value] of Object.entries(this._flightData)) {
|
|
yield [parseInt(key), value];
|
|
}
|
|
}
|
|
}
|
|
|
|
as_list() {
|
|
return this._flightData ? Object.values(this._flightData) : [];
|
|
}
|
|
|
|
static from_list(list, viaEnumerate = false) {
|
|
// Check if all items have index property
|
|
const allHaveIndex = list.every(item => typeof item.index === 'number');
|
|
|
|
if (allHaveIndex) {
|
|
const value = {};
|
|
for (const item of list) {
|
|
value[item.index] = item;
|
|
}
|
|
return new BeautifulFD(value);
|
|
} else if (viaEnumerate) {
|
|
const value = {};
|
|
list.forEach((item, idx) => {
|
|
value[idx] = item;
|
|
});
|
|
return new BeautifulFD(value);
|
|
} else {
|
|
throw new Error(
|
|
"Cannot load the given list since elements do not all have an index written on them. " +
|
|
"You can set `viaEnumerate` to `true` to use the elements' positions in the given list as their indexes."
|
|
);
|
|
}
|
|
}
|
|
|
|
*find_iter(classFilters = null, callback = null, recursive = true) {
|
|
// Convert string class names to actual classes
|
|
let newClassFilters = null;
|
|
if (classFilters !== null) {
|
|
newClassFilters = new Set();
|
|
for (const cls of classFilters) {
|
|
if (typeof cls === 'function') {
|
|
newClassFilters.add(cls);
|
|
} else if (typeof cls === 'string') {
|
|
if (cls in _tl2obj) {
|
|
newClassFilters.add(_tl2obj[cls]);
|
|
} else {
|
|
throw new Error(`The class filter "${cls}" is not present in the list of conversion: ${Object.keys(_tl2obj).join(', ')}.`);
|
|
}
|
|
} else {
|
|
newClassFilters.add(cls);
|
|
}
|
|
}
|
|
}
|
|
|
|
yield* finditerInFlightData(this._flightData, newClassFilters, callback, recursive);
|
|
}
|
|
|
|
find_all(classFilters = null, callback = null, recursive = true) {
|
|
return Array.from(this.find_iter(classFilters, callback, recursive));
|
|
}
|
|
|
|
find(classFilters = null, callback = null, recursive = true) {
|
|
for (const item of this.find_iter(classFilters, callback, recursive)) {
|
|
return item;
|
|
}
|
|
return null;
|
|
}
|
|
}
|