/**
 * Extracts the charset from a text MIME type.
 *
 * @example
 * getTextCharset("text/plain; charset=utf-8") // "utf-8"
 * getTextCharset("text/plain; charset=iso-8859-1") // "iso-8859-1"
 * getTextCharset("text/html; charset=utf-8") // "utf-8"
 * getTextCharset("application/pdf") // null
 */
export function getTextCharset(mime: string): string | null {
    // this a really simple implementation, but it should be good enough for our use case
    // if we ever need a full implementation, we should use a library like https://github.com/jsdom/whatwg-mimetype
    const [mediaType, parameters] = mime.toLowerCase().split(";");
    if (!mediaType?.trim().startsWith("text/")) {
        return null;
    }

    const charset = parameters?.trim().split("=");
    if (!charset || charset.length !== 2 || charset[0].trim() !== "charset") {
        return "utf-8"; // assume utf-8 if charset is not specified
    }
    return charset[1].trim() || "utf-8";
}
