/*
 * Decompiled with CFR 0.152.
 */
package uk.ac.shef.wit.simmetrics.similaritymetrics;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.HashSet;
import uk.ac.shef.wit.simmetrics.similaritymetrics.AbstractStringMetric;
import uk.ac.shef.wit.simmetrics.tokenisers.InterfaceTokeniser;
import uk.ac.shef.wit.simmetrics.tokenisers.TokeniserWhitespace;

public final class CosineSimilarity
extends AbstractStringMetric
implements Serializable {
    private final float ESTIMATEDTIMINGCONST = 3.8337143E-7f;
    private final InterfaceTokeniser tokeniser;

    public CosineSimilarity() {
        this.tokeniser = new TokeniserWhitespace();
    }

    public CosineSimilarity(InterfaceTokeniser tokeniserToUse) {
        this.tokeniser = tokeniserToUse;
    }

    public String getShortDescriptionString() {
        return "CosineSimilarity";
    }

    public String getLongDescriptionString() {
        return "Implements the Cosine Similarity algorithm providing a similarity measure between two strings from the angular divergence within term based vector space";
    }

    public String getSimilarityExplained(String string1, String string2) {
        return null;
    }

    public float getSimilarityTimingEstimated(String string1, String string2) {
        float str1Length = string1.length();
        float str2Length = string2.length();
        return (str1Length + str2Length) * ((str1Length + str2Length) * 3.8337143E-7f);
    }

    public float getSimilarity(String string1, String string2) {
        ArrayList<String> str1Tokens = this.tokeniser.tokenizeToArrayList(string1);
        ArrayList<String> str2Tokens = this.tokeniser.tokenizeToArrayList(string2);
        HashSet<String> allTokens = new HashSet<String>();
        allTokens.addAll(str1Tokens);
        int termsInString1 = allTokens.size();
        HashSet<String> secondStringTokens = new HashSet<String>();
        secondStringTokens.addAll(str2Tokens);
        int termsInString2 = secondStringTokens.size();
        allTokens.addAll(secondStringTokens);
        int commonTerms = termsInString1 + termsInString2 - allTokens.size();
        return (float)commonTerms / (float)(Math.pow(termsInString1, 0.5) * Math.pow(termsInString2, 0.5));
    }

    public float getUnNormalisedSimilarity(String string1, String string2) {
        return this.getSimilarity(string1, string2);
    }
}

