/*
 * Version: 1.0
 *
 * The contents of this file are subject to the OpenVPMS License Version
 * 1.0 (the 'License'); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 * http://www.openvpms.org/license/
 *
 * Software distributed under the License is distributed on an 'AS IS' basis,
 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
 * for the specific language governing rights and limitations under the
 * License.
 *
 * Copyright 2024 (C) OpenVPMS Ltd. All Rights Reserved.
 */


package org.openvpms.component.system.common.util;

import org.apache.commons.lang3.StringUtils;
import org.apache.oro.text.perl.Perl5Util;

import java.util.ArrayList;
import java.util.regex.Pattern;


/**
 * Holds a number of string utility methods.
 *
 * @author Jim Alateras
 */
public class StringUtilities {

    /**
     * Regular expression that matches strings containing control characters except
     * '\n', '\r', '\t'.
     */
    private static final String CNTRL_CHARS
            = "[\\x00-\\x08\\x0B\\x0C\\x0E-\\x1F\\x7F]";

    /**
     * Pattern that matches strings containing control characters.
     */
    private static final Pattern CONTAINS
            = Pattern.compile("(?s).*" + CNTRL_CHARS + ".*"); // (?s) turns on "dot matches newline"

    /**
     * Pattern used to replace control characters.
     */
    private static final Pattern REPLACE = Pattern.compile(CNTRL_CHARS);


    /**
     * Regular expression that matches strings containing 4-byte unicode. 
     */
    private static final String UNSUPPORTED_UNICODE = "[^\u0000-\uFFFF]";

    /**
     * Pattern that matches strings containing 4-byte unicode. 
     */
    private static final Pattern CONTAINS_4_BYTE_UNICODE
            = Pattern.compile("(?s).*" + UNSUPPORTED_UNICODE + ".*"); // (?s) turns on "dot matches newline"

    /**
     * Pattern used to replace 4-byte unicode characters.
     */
    private static final Pattern REPLACE_4_BYTE_UNICODE = Pattern.compile(UNSUPPORTED_UNICODE);

    /**
     * Uncamel cases the specified name.
     *
     * @param name the camel cased name. May be {@code null}
     * @return the uncamel cased name. May be {@code null}
     */
    public static String unCamelCase(String name) {
        ArrayList<String> words = new ArrayList<>();
        Perl5Util perl = new Perl5Util();

        while (perl.match("/(\\w+?)([A-Z].*)/", name)) {
            String word = perl.group(1);
            name = perl.group(2);
            words.add(StringUtils.capitalize(word));
        }

        words.add(StringUtils.capitalize(name));

        return StringUtils.join(words.iterator(), " ");
    }

    /**
     * Convert the incoming string to a regular expression. This means
     * escaping the '.' and converting all the '*' to '.*'
     *
     * @param input the input string
     * @return the converted string
     */
    public static String toRegEx(String input) {
        return input.replace(".", "\\.").replace("*", ".*");
    }

    /**
     * The '*' character denotes a wildcard character. This method will do a
     * regular expression match against the input string. It first converts any
     * '*' characters to the equivalent '.*' regular expression before executing
     * a regex match
     *
     * @param str        the string that is matched
     * @param expression the expression to match, which can contain wild card characters
     * @return {@code true} if it matches, otherwise {@code false}
     */
    public static boolean matches(String str, String expression) {
        if (!expression.contains("*")) {
            // use faster equals() when no wildcard specified
            return str.equals(expression);
        }
        return str.matches(toRegEx(expression));
    }

    /**
     * Determines if a string contains control characters that can cause rendering issues.
     * <p/>
     * These are those that match the regexp pattern {@code \p{Cntrl}}) except '\n', '\r', '\t'.
     *
     * @param str the string. May be {@code null}
     * @return {@code true} if the string contains control characters, otherwise {@code false}
     */
    public static boolean hasControlChars(String str) {
        return (str != null) && CONTAINS.matcher(str).matches();
    }

    /**
     * Replaces any control characters.
     * <p/>
     * These are those that match the regexp pattern {@code \p{Cntrl}}) except '\n', '\r', '\t'.
     *
     * @param str  the string. May be {@code null}
     * @param with the string to replace the characters with
     * @return the string with any control characters replaced
     */
    public static String replaceControlChars(String str, String with) {
        if (str != null) {
            str = REPLACE.matcher(str).replaceAll(with);
        }
        return str;
    }

    /**
     * Determines if a string contains 4-byte unicode.
     *
     * @param str the string
     * @return {@code true} if the string contains 4-byte unicode, otherwise {@code false}
     */
    public static boolean has4ByteUnicode(String str) {
        return str != null && CONTAINS_4_BYTE_UNICODE.matcher(str).matches();
    }

    /**
     * Replaces any 4-byte unicode characters with �.
     * <p/>
     * This can be used to remove 4-byte emojis and any other character not supported by the current encoding
     * used by MySQL. See OVPMS-2534.
     *
     * @param str the string. May be {@code null}
     * @return the string, with any 4-byte unicode characters replaced with �. May be {@code null}
     */
    public static String replace4ByteUnicode(String str) {
        return replace4ByteUnicode(str, "\uFFFD");
    }

    /**
     * Replaces any 4-byte unicode characters with the specified string.
     * <p/>
     * This can be used to remove 4-byte emojis and any other character not supported by the current encoding
     * used by MySQL. See OVPMS-2534.
     *
     * @param str the string. May be {@code null}
     * @param with the string to replace the characters with
     * @return the string, with any 4-byte unicode characters replaced. May be {@code null}
     */
    public static String replace4ByteUnicode(String str, String with) {
        if (str != null) {
            str = REPLACE_4_BYTE_UNICODE.matcher(str).replaceAll(with);
        }
        return str;
    }
}
