216 lines
6.7 KiB
JavaScript
216 lines
6.7 KiB
JavaScript
/**
|
|
* Flight data extraction and parsing
|
|
*/
|
|
|
|
import { makeTree } from '../utils.js';
|
|
import { resolveType } from './types.js';
|
|
|
|
// Regex patterns for matching flight data scripts
|
|
const RE_F_INIT = /\(self\.__next_f\s?=\s?self\.__next_f\s?\|\|\s?\[\]\)\.push\((\[.+?\])\)/;
|
|
const RE_F_PAYLOAD = /self\.__next_f\.push\((\[.+)\)$/;
|
|
|
|
// Segment types
|
|
const Segment = {
|
|
is_bootstrap: 0,
|
|
is_not_bootstrap: 1,
|
|
is_form_state: 2,
|
|
is_binary: 3
|
|
};
|
|
|
|
/**
|
|
* Check if HTML contains flight data
|
|
* @param {string} html - HTML string
|
|
* @param {DOMParser} DOMParser - DOMParser instance
|
|
* @returns {boolean} True if flight data exists
|
|
*/
|
|
export function hasFlightData(html, DOMParser) {
|
|
const doc = makeTree(html, DOMParser);
|
|
const scripts = Array.from(doc.querySelectorAll('script')).map(s => s.textContent || '');
|
|
return scripts.some(script => RE_F_INIT.test(script));
|
|
}
|
|
|
|
/**
|
|
* Extract raw flight data from HTML
|
|
* @param {string} html - HTML string
|
|
* @param {DOMParser} DOMParser - DOMParser instance
|
|
* @returns {Array|null} Raw flight data array or null
|
|
*/
|
|
export function getRawFlightData(html, DOMParser) {
|
|
const doc = makeTree(html, DOMParser);
|
|
const scripts = Array.from(doc.querySelectorAll('script')).map(s => s.textContent || '');
|
|
|
|
const result = [];
|
|
let foundInit = false;
|
|
|
|
for (const script of scripts) {
|
|
const trimmed = script.trim();
|
|
|
|
// Check for initialization script
|
|
if (!foundInit) {
|
|
const initMatch = trimmed.match(RE_F_INIT);
|
|
if (initMatch) {
|
|
foundInit = true;
|
|
result.push(JSON.parse(initMatch[1]));
|
|
}
|
|
}
|
|
|
|
// Check for payload script
|
|
const payloadMatch = trimmed.match(RE_F_PAYLOAD);
|
|
if (payloadMatch) {
|
|
result.push(JSON.parse(payloadMatch[1]));
|
|
}
|
|
}
|
|
|
|
return result.length > 0 ? result : null;
|
|
}
|
|
|
|
/**
|
|
* Decode raw flight data segments
|
|
* @param {Array} rawFlightData - Raw flight data array
|
|
* @returns {Array<string>} Decoded flight data chunks
|
|
*/
|
|
export function decodeRawFlightData(rawFlightData) {
|
|
let initialServerDataBuffer;
|
|
let initialFormStateData;
|
|
|
|
for (const seg of rawFlightData) {
|
|
const segmentType = seg[0];
|
|
|
|
if (segmentType === Segment.is_bootstrap) {
|
|
initialServerDataBuffer = [];
|
|
} else if (segmentType === Segment.is_not_bootstrap) {
|
|
if (initialServerDataBuffer === undefined) {
|
|
throw new Error(
|
|
'The `initialServerDataBuffer` was not yet initialized and a segment tried to append its data to it. ' +
|
|
'This should not be happening if the flight data starts correctly with a the `is_bootstrap` segment.'
|
|
);
|
|
}
|
|
initialServerDataBuffer.push(seg[1]);
|
|
} else if (segmentType === Segment.is_form_state) {
|
|
initialFormStateData = seg[1];
|
|
} else if (segmentType === Segment.is_binary) {
|
|
if (initialServerDataBuffer === undefined) {
|
|
throw new Error(
|
|
'The `initialServerDataBuffer` was not yet initialized and a segment tried to append its data to it. ' +
|
|
'This should not be happening if the flight data starts correctly with a the `is_bootstrap` segment.'
|
|
);
|
|
}
|
|
// Decode base64
|
|
const decodedChunk = atob(seg[1]);
|
|
initialServerDataBuffer.push(decodedChunk);
|
|
} else {
|
|
throw new Error(`Unknown segment type seg[0]=${segmentType}`);
|
|
}
|
|
}
|
|
|
|
return initialServerDataBuffer;
|
|
}
|
|
|
|
/**
|
|
* Parse decoded raw flight data into structured objects
|
|
* @param {Array<string>} decodedRawFlightData - Decoded flight data chunks
|
|
* @returns {Object} Dictionary mapping indices to parsed elements
|
|
*/
|
|
export function parseDecodedRawFlightData(decodedRawFlightData) {
|
|
// Join and encode to bytes
|
|
const compiledRawFlightData = new TextEncoder().encode(decodedRawFlightData.join(''));
|
|
const indexedResult = {};
|
|
let pos = 0;
|
|
|
|
while (true) {
|
|
const indexStringEnd = compiledRawFlightData.indexOf(58, pos); // ':'
|
|
if (indexStringEnd === -1) {
|
|
break;
|
|
}
|
|
|
|
const indexStringRaw = compiledRawFlightData.slice(pos, indexStringEnd);
|
|
let index = null;
|
|
if (indexStringRaw.length > 0) {
|
|
const indexStr = new TextDecoder().decode(indexStringRaw);
|
|
index = parseInt(indexStr, 16);
|
|
}
|
|
pos = indexStringEnd + 1;
|
|
|
|
// Extract value class (uppercase letters)
|
|
let valueClass = '';
|
|
while (pos < compiledRawFlightData.length) {
|
|
const char = String.fromCharCode(compiledRawFlightData[pos]);
|
|
if (/[A-Z]/.test(char)) {
|
|
valueClass += char;
|
|
pos++;
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
valueClass = valueClass || null;
|
|
|
|
let value;
|
|
|
|
if (valueClass === 'T') {
|
|
const textLengthStringEnd = compiledRawFlightData.indexOf(44, pos); // ','
|
|
const textLengthHex = compiledRawFlightData.slice(pos, textLengthStringEnd);
|
|
const textLength = parseInt(new TextDecoder().decode(textLengthHex), 16);
|
|
const textStart = textLengthStringEnd + 1;
|
|
value = new TextDecoder().decode(compiledRawFlightData.slice(textStart, textStart + textLength));
|
|
pos = textStart + textLength;
|
|
} else {
|
|
// Find next split point
|
|
let dataEnd = -1;
|
|
for (let i = pos; i < compiledRawFlightData.length - 1; i++) {
|
|
if (compiledRawFlightData[i] === 10) { // '\n'
|
|
if (i === 0 || compiledRawFlightData[i - 1] !== 92) { // not escaped
|
|
let j = i + 1;
|
|
while (j < compiledRawFlightData.length && /[0-9a-f]/.test(String.fromCharCode(compiledRawFlightData[j]))) {
|
|
j++;
|
|
}
|
|
if (j < compiledRawFlightData.length && compiledRawFlightData[j] === 58) {
|
|
dataEnd = i;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
const rawValue = dataEnd !== -1
|
|
? compiledRawFlightData.slice(pos, dataEnd)
|
|
: compiledRawFlightData.slice(pos);
|
|
|
|
pos = dataEnd !== -1 ? dataEnd + 1 : compiledRawFlightData.length;
|
|
|
|
const rawText = new TextDecoder().decode(rawValue);
|
|
if (rawText.length === 0) {
|
|
value = null;
|
|
} else {
|
|
value = JSON.parse(rawText);
|
|
}
|
|
}
|
|
|
|
const resolved = resolveType(value, valueClass, index);
|
|
|
|
if (index === null) {
|
|
if (!(index in indexedResult)) {
|
|
indexedResult[index] = [];
|
|
}
|
|
indexedResult[index].push(resolved);
|
|
} else {
|
|
indexedResult[index] = resolved;
|
|
}
|
|
}
|
|
|
|
return indexedResult;
|
|
}
|
|
|
|
/**
|
|
* Get parsed flight data from HTML
|
|
* @param {string} html - HTML string
|
|
* @param {DOMParser} DOMParser - DOMParser instance
|
|
* @returns {Object|null} Parsed flight data or null
|
|
*/
|
|
export function getFlightData(html, DOMParser) {
|
|
const rawFlightData = getRawFlightData(html, DOMParser);
|
|
if (rawFlightData === null) {
|
|
return null;
|
|
}
|
|
const decodedRawFlightData = decodeRawFlightData(rawFlightData);
|
|
return parseDecodedRawFlightData(decodedRawFlightData);
|
|
}
|