njsparser/js/parser/flight_data.js
2026-02-15 01:34:37 +01:00

216 lines
6.7 KiB
JavaScript

/**
* Flight data extraction and parsing
*/
import { makeTree } from '../utils.js';
import { resolveType } from './types.js';
// Regex patterns for matching flight data scripts
const RE_F_INIT = /\(self\.__next_f\s?=\s?self\.__next_f\s?\|\|\s?\[\]\)\.push\((\[.+?\])\)/;
const RE_F_PAYLOAD = /self\.__next_f\.push\((\[.+)\)$/;
// Segment types
const Segment = {
is_bootstrap: 0,
is_not_bootstrap: 1,
is_form_state: 2,
is_binary: 3
};
/**
* Check if HTML contains flight data
* @param {string} html - HTML string
* @param {DOMParser} DOMParser - DOMParser instance
* @returns {boolean} True if flight data exists
*/
export function hasFlightData(html, DOMParser) {
const doc = makeTree(html, DOMParser);
const scripts = Array.from(doc.querySelectorAll('script')).map(s => s.textContent || '');
return scripts.some(script => RE_F_INIT.test(script));
}
/**
* Extract raw flight data from HTML
* @param {string} html - HTML string
* @param {DOMParser} DOMParser - DOMParser instance
* @returns {Array|null} Raw flight data array or null
*/
export function getRawFlightData(html, DOMParser) {
const doc = makeTree(html, DOMParser);
const scripts = Array.from(doc.querySelectorAll('script')).map(s => s.textContent || '');
const result = [];
let foundInit = false;
for (const script of scripts) {
const trimmed = script.trim();
// Check for initialization script
if (!foundInit) {
const initMatch = trimmed.match(RE_F_INIT);
if (initMatch) {
foundInit = true;
result.push(JSON.parse(initMatch[1]));
}
}
// Check for payload script
const payloadMatch = trimmed.match(RE_F_PAYLOAD);
if (payloadMatch) {
result.push(JSON.parse(payloadMatch[1]));
}
}
return result.length > 0 ? result : null;
}
/**
* Decode raw flight data segments
* @param {Array} rawFlightData - Raw flight data array
* @returns {Array<string>} Decoded flight data chunks
*/
export function decodeRawFlightData(rawFlightData) {
let initialServerDataBuffer;
let initialFormStateData;
for (const seg of rawFlightData) {
const segmentType = seg[0];
if (segmentType === Segment.is_bootstrap) {
initialServerDataBuffer = [];
} else if (segmentType === Segment.is_not_bootstrap) {
if (initialServerDataBuffer === undefined) {
throw new Error(
'The `initialServerDataBuffer` was not yet initialized and a segment tried to append its data to it. ' +
'This should not be happening if the flight data starts correctly with a the `is_bootstrap` segment.'
);
}
initialServerDataBuffer.push(seg[1]);
} else if (segmentType === Segment.is_form_state) {
initialFormStateData = seg[1];
} else if (segmentType === Segment.is_binary) {
if (initialServerDataBuffer === undefined) {
throw new Error(
'The `initialServerDataBuffer` was not yet initialized and a segment tried to append its data to it. ' +
'This should not be happening if the flight data starts correctly with a the `is_bootstrap` segment.'
);
}
// Decode base64
const decodedChunk = atob(seg[1]);
initialServerDataBuffer.push(decodedChunk);
} else {
throw new Error(`Unknown segment type seg[0]=${segmentType}`);
}
}
return initialServerDataBuffer;
}
/**
* Parse decoded raw flight data into structured objects
* @param {Array<string>} decodedRawFlightData - Decoded flight data chunks
* @returns {Object} Dictionary mapping indices to parsed elements
*/
export function parseDecodedRawFlightData(decodedRawFlightData) {
// Join and encode to bytes
const compiledRawFlightData = new TextEncoder().encode(decodedRawFlightData.join(''));
const indexedResult = {};
let pos = 0;
while (true) {
const indexStringEnd = compiledRawFlightData.indexOf(58, pos); // ':'
if (indexStringEnd === -1) {
break;
}
const indexStringRaw = compiledRawFlightData.slice(pos, indexStringEnd);
let index = null;
if (indexStringRaw.length > 0) {
const indexStr = new TextDecoder().decode(indexStringRaw);
index = parseInt(indexStr, 16);
}
pos = indexStringEnd + 1;
// Extract value class (uppercase letters)
let valueClass = '';
while (pos < compiledRawFlightData.length) {
const char = String.fromCharCode(compiledRawFlightData[pos]);
if (/[A-Z]/.test(char)) {
valueClass += char;
pos++;
} else {
break;
}
}
valueClass = valueClass || null;
let value;
if (valueClass === 'T') {
const textLengthStringEnd = compiledRawFlightData.indexOf(44, pos); // ','
const textLengthHex = compiledRawFlightData.slice(pos, textLengthStringEnd);
const textLength = parseInt(new TextDecoder().decode(textLengthHex), 16);
const textStart = textLengthStringEnd + 1;
value = new TextDecoder().decode(compiledRawFlightData.slice(textStart, textStart + textLength));
pos = textStart + textLength;
} else {
// Find next split point
let dataEnd = -1;
for (let i = pos; i < compiledRawFlightData.length - 1; i++) {
if (compiledRawFlightData[i] === 10) { // '\n'
if (i === 0 || compiledRawFlightData[i - 1] !== 92) { // not escaped
let j = i + 1;
while (j < compiledRawFlightData.length && /[0-9a-f]/.test(String.fromCharCode(compiledRawFlightData[j]))) {
j++;
}
if (j < compiledRawFlightData.length && compiledRawFlightData[j] === 58) {
dataEnd = i;
break;
}
}
}
}
const rawValue = dataEnd !== -1
? compiledRawFlightData.slice(pos, dataEnd)
: compiledRawFlightData.slice(pos);
pos = dataEnd !== -1 ? dataEnd + 1 : compiledRawFlightData.length;
const rawText = new TextDecoder().decode(rawValue);
if (rawText.length === 0) {
value = null;
} else {
value = JSON.parse(rawText);
}
}
const resolved = resolveType(value, valueClass, index);
if (index === null) {
if (!(index in indexedResult)) {
indexedResult[index] = [];
}
indexedResult[index].push(resolved);
} else {
indexedResult[index] = resolved;
}
}
return indexedResult;
}
/**
* Get parsed flight data from HTML
* @param {string} html - HTML string
* @param {DOMParser} DOMParser - DOMParser instance
* @returns {Object|null} Parsed flight data or null
*/
export function getFlightData(html, DOMParser) {
const rawFlightData = getRawFlightData(html, DOMParser);
if (rawFlightData === null) {
return null;
}
const decodedRawFlightData = decodeRawFlightData(rawFlightData);
return parseDecodedRawFlightData(decodedRawFlightData);
}