Machine Coding Problem

Search Ranking (Learning to Rank)

maco30maco60macoAllsearchndcgfeature-engineering
Commonly Asked By:GoogleMicrosoftAmazonNetflix

Functional Specifications

  • Dynamic BM25 calculation: Extract term frequency (tf) and document length factors to calculate relevance score dynamically.
  • Feature Vector Assembly: Integrate BM25 outputs with PageRank, click-through rates (CTR), and publish recency parameters.
  • Multi-Factor Model Scoring: Multi-weight re-ranking model combining scores into sorting metrics.
  • NDCG Evaluation Checks: Compute dynamic DCG and IDCG metrics against human relevance tags to assess model quality.

Production reference implementations demonstrating BM25 calculations, linear ranking models, and NDCG@K evaluators:

// ─── JAVA BLUEPRINT ──────────────────────────────────────────────────────────
import java.util.*;

class SearchDocument {
    private final String id;
    private final String content;
    private final double pageRank;
    private final double clickThroughRate;
    private final long publishTimeMs;

    public SearchDocument(String id, String content, double pageRank, double clickThroughRate, long publishTimeMs) {
        this.id = id;
        this.content = content;
        this.pageRank = pageRank;
        this.clickThroughRate = clickThroughRate;
        this.publishTimeMs = publishTimeMs;
    }

    public String getId() { return id; }
    public String getContent() { return content; }
    public double getPageRank() { return pageRank; }
    public double getClickThroughRate() { return clickThroughRate; }
    public long getPublishTimeMs() { return publishTimeMs; }
}

class FeatureVector {
    private final double bm25;
    private final double pageRank;
    private final double ctr;
    private final double recency;

    public FeatureVector(double bm25, double pageRank, double ctr, double recency) {
        this.bm25 = bm25;
        this.pageRank = pageRank;
        this.ctr = ctr;
        this.recency = recency;
    }

    public double getBm25() { return bm25; }
    public double getPageRank() { return pageRank; }
    public double getCtr() { return ctr; }
    public double getRecency() { return recency; }
}

interface RankingModel {
    double score(FeatureVector features);
}

class LinearRankingModel implements RankingModel {
    private final double wBm25;
    private final double wPageRank;
    private final double wCtr;
    private final double wRecency;

    public LinearRankingModel(double wBm25, double wPageRank, double wCtr, double wRecency) {
        this.wBm25 = wBm25;
        this.wPageRank = wPageRank;
        this.wCtr = wCtr;
        this.wRecency = wRecency;
    }

    @Override
    public double score(FeatureVector features) {
        return (features.getBm25() * wBm25) +
               (features.getPageRank() * wPageRank) +
               (features.getCtr() * wCtr) +
               (features.getRecency() * wRecency);
    }
}

class BM25Extractor {
    private static final double K1 = 1.2;
    private static final double B = 0.75;
    private static final double AVG_DOC_LENGTH = 15.0;

    public static double compute(String query, String docContent) {
        String[] qTerms = query.toLowerCase().split("\s+");
        String[] dTerms = docContent.toLowerCase().split("\s+");
        int docLen = dTerms.length;

        double score = 0.0;
        for (String qTerm : qTerms) {
            int tf = 0;
            for (String dTerm : dTerms) {
                if (dTerm.equals(qTerm)) tf++;
            }

            double idf = Math.log(1.0 + (10.0 / (1.0 + tf)));
            double tfFactor = (tf * (K1 + 1.0)) / (tf + K1 * (1.0 - B + B * (docLen / AVG_DOC_LENGTH)));
            score += idf * tfFactor;
        }
        return score;
    }
}

class NDCGEvaluator {
    public static double computeNDCG(List<Integer> relevanceScores, int k) {
        if (relevanceScores.isEmpty() || k <= 0) return 0.0;
        int limit = Math.min(relevanceScores.size(), k);

        double dcg = 0.0;
        for (int i = 0; i < limit; i++) {
            int rel = relevanceScores.get(i);
            dcg += (Math.pow(2, rel) - 1.0) / (Math.log(i + 2) / Math.log(2));
        }

        List<Integer> ideal = new ArrayList<>(relevanceScores);
        ideal.sort(Comparator.reverseOrder());

        double idcg = 0.0;
        for (int i = 0; i < limit; i++) {
            int rel = ideal.get(i);
            idcg += (Math.pow(2, rel) - 1.0) / (Math.log(i + 2) / Math.log(2));
        }

        if (idcg == 0.0) return 0.0;
        return dcg / idcg;
    }
}

public class Main {
    public static void main(String[] args) {
        System.out.println("=== LEARNING TO RANK (LTR) SIMULATION ===");

        // Setup candidate documents
        List<SearchDocument> database = new ArrayList<>();
        database.add(new SearchDocument("doc-1", "Machine learning patterns and neural networks are elegant", 0.95, 0.35, 1000L));
        database.add(new SearchDocument("doc-2", "Deep learning algorithms and deep network models explain LLD structures", 0.85, 0.22, 2000L));
        database.add(new SearchDocument("doc-3", "A recipe to bake delicious pancakes with blueberries", 0.50, 0.05, 500L));

        // Ground-truth relevance mapping (manually labeled)
        Map<String, Integer> groundTruth = new HashMap<>();
        groundTruth.put("doc-1", 3); // Perfect
        groundTruth.put("doc-2", 2); // Excellent
        groundTruth.put("doc-3", 0); // Irrelevant

        String query = "learning network";
        System.out.println("Search Query: '" + query + "'");

        // Model weights
        LinearRankingModel model = new LinearRankingModel(0.4, 0.3, 0.2, 0.1);
        System.out.println("Model weights: wBm25=0.4, wPageRank=0.3, wCtr=0.2, wRecency=0.1");

        // Feature Extraction and Scoring
        class ScoredDoc {
            SearchDocument doc;
            double score;
            ScoredDoc(SearchDocument doc, double score) {
                this.doc = doc;
                this.score = score;
            }
        }

        List<ScoredDoc> scoredDocs = new ArrayList<>();
        for (SearchDocument doc : database) {
            double bm25 = BM25Extractor.compute(query, doc.getContent());
            double recency = 1.0 / (1.0 + (3000L - doc.getPublishTimeMs()) / 1000.0);
            FeatureVector features = new FeatureVector(bm25, doc.getPageRank(), doc.getClickThroughRate(), recency);
            double finalScore = model.score(features);
            scoredDocs.add(new ScoredDoc(doc, finalScore));
            System.out.println("Doc: " + doc.getId() + " | BM25: " + String.format("%.3f", bm25) + " | PageRank: " + doc.getPageRank() + " | CTR: " + doc.getClickThroughRate() + " | Recency: " + String.format("%.3f", recency) + " -> Score: " + String.format("%.3f", finalScore));
        }

        // Re-ranking
        scoredDocs.sort((a, b) -> Double.compare(b.score, a.score));

        System.out.println("\n--- RANKED SEARCH RESULTS ---");
        List<Integer> activeRelevanceList = new ArrayList<>();
        for (int i = 0; i < scoredDocs.size(); i++) {
            ScoredDoc sd = scoredDocs.get(i);
            int rel = groundTruth.getOrDefault(sd.doc.getId(), 0);
            activeRelevanceList.add(rel);
            System.out.println((i + 1) + ". " + sd.doc.getId() + " (Score: " + String.format("%.3f", sd.score) + ") [Human Relevance: " + rel + "]");
        }

        // NDCG Evaluation
        double ndcg = NDCGEvaluator.computeNDCG(activeRelevanceList, 3);
        System.out.println("\nCalculated Model Quality Metric: NDCG@3 = " + String.format("%.4f", ndcg));
        System.out.println("Simulation finished successfully.");
    }
}