Functional Specifications
- Dynamic BM25 calculation: Extract term frequency (tf) and document length factors to calculate relevance score dynamically.
- Feature Vector Assembly: Integrate BM25 outputs with PageRank, click-through rates (CTR), and publish recency parameters.
- Multi-Factor Model Scoring: Multi-weight re-ranking model combining scores into sorting metrics.
- NDCG Evaluation Checks: Compute dynamic DCG and IDCG metrics against human relevance tags to assess model quality.
Production reference implementations demonstrating BM25 calculations, linear ranking models, and NDCG@K evaluators:
// ─── JAVA BLUEPRINT ──────────────────────────────────────────────────────────
import java.util.*;
class SearchDocument {
private final String id;
private final String content;
private final double pageRank;
private final double clickThroughRate;
private final long publishTimeMs;
public SearchDocument(String id, String content, double pageRank, double clickThroughRate, long publishTimeMs) {
this.id = id;
this.content = content;
this.pageRank = pageRank;
this.clickThroughRate = clickThroughRate;
this.publishTimeMs = publishTimeMs;
}
public String getId() { return id; }
public String getContent() { return content; }
public double getPageRank() { return pageRank; }
public double getClickThroughRate() { return clickThroughRate; }
public long getPublishTimeMs() { return publishTimeMs; }
}
class FeatureVector {
private final double bm25;
private final double pageRank;
private final double ctr;
private final double recency;
public FeatureVector(double bm25, double pageRank, double ctr, double recency) {
this.bm25 = bm25;
this.pageRank = pageRank;
this.ctr = ctr;
this.recency = recency;
}
public double getBm25() { return bm25; }
public double getPageRank() { return pageRank; }
public double getCtr() { return ctr; }
public double getRecency() { return recency; }
}
interface RankingModel {
double score(FeatureVector features);
}
class LinearRankingModel implements RankingModel {
private final double wBm25;
private final double wPageRank;
private final double wCtr;
private final double wRecency;
public LinearRankingModel(double wBm25, double wPageRank, double wCtr, double wRecency) {
this.wBm25 = wBm25;
this.wPageRank = wPageRank;
this.wCtr = wCtr;
this.wRecency = wRecency;
}
@Override
public double score(FeatureVector features) {
return (features.getBm25() * wBm25) +
(features.getPageRank() * wPageRank) +
(features.getCtr() * wCtr) +
(features.getRecency() * wRecency);
}
}
class BM25Extractor {
private static final double K1 = 1.2;
private static final double B = 0.75;
private static final double AVG_DOC_LENGTH = 15.0;
public static double compute(String query, String docContent) {
String[] qTerms = query.toLowerCase().split("\s+");
String[] dTerms = docContent.toLowerCase().split("\s+");
int docLen = dTerms.length;
double score = 0.0;
for (String qTerm : qTerms) {
int tf = 0;
for (String dTerm : dTerms) {
if (dTerm.equals(qTerm)) tf++;
}
double idf = Math.log(1.0 + (10.0 / (1.0 + tf)));
double tfFactor = (tf * (K1 + 1.0)) / (tf + K1 * (1.0 - B + B * (docLen / AVG_DOC_LENGTH)));
score += idf * tfFactor;
}
return score;
}
}
class NDCGEvaluator {
public static double computeNDCG(List<Integer> relevanceScores, int k) {
if (relevanceScores.isEmpty() || k <= 0) return 0.0;
int limit = Math.min(relevanceScores.size(), k);
double dcg = 0.0;
for (int i = 0; i < limit; i++) {
int rel = relevanceScores.get(i);
dcg += (Math.pow(2, rel) - 1.0) / (Math.log(i + 2) / Math.log(2));
}
List<Integer> ideal = new ArrayList<>(relevanceScores);
ideal.sort(Comparator.reverseOrder());
double idcg = 0.0;
for (int i = 0; i < limit; i++) {
int rel = ideal.get(i);
idcg += (Math.pow(2, rel) - 1.0) / (Math.log(i + 2) / Math.log(2));
}
if (idcg == 0.0) return 0.0;
return dcg / idcg;
}
}
public class Main {
public static void main(String[] args) {
System.out.println("=== LEARNING TO RANK (LTR) SIMULATION ===");
// Setup candidate documents
List<SearchDocument> database = new ArrayList<>();
database.add(new SearchDocument("doc-1", "Machine learning patterns and neural networks are elegant", 0.95, 0.35, 1000L));
database.add(new SearchDocument("doc-2", "Deep learning algorithms and deep network models explain LLD structures", 0.85, 0.22, 2000L));
database.add(new SearchDocument("doc-3", "A recipe to bake delicious pancakes with blueberries", 0.50, 0.05, 500L));
// Ground-truth relevance mapping (manually labeled)
Map<String, Integer> groundTruth = new HashMap<>();
groundTruth.put("doc-1", 3); // Perfect
groundTruth.put("doc-2", 2); // Excellent
groundTruth.put("doc-3", 0); // Irrelevant
String query = "learning network";
System.out.println("Search Query: '" + query + "'");
// Model weights
LinearRankingModel model = new LinearRankingModel(0.4, 0.3, 0.2, 0.1);
System.out.println("Model weights: wBm25=0.4, wPageRank=0.3, wCtr=0.2, wRecency=0.1");
// Feature Extraction and Scoring
class ScoredDoc {
SearchDocument doc;
double score;
ScoredDoc(SearchDocument doc, double score) {
this.doc = doc;
this.score = score;
}
}
List<ScoredDoc> scoredDocs = new ArrayList<>();
for (SearchDocument doc : database) {
double bm25 = BM25Extractor.compute(query, doc.getContent());
double recency = 1.0 / (1.0 + (3000L - doc.getPublishTimeMs()) / 1000.0);
FeatureVector features = new FeatureVector(bm25, doc.getPageRank(), doc.getClickThroughRate(), recency);
double finalScore = model.score(features);
scoredDocs.add(new ScoredDoc(doc, finalScore));
System.out.println("Doc: " + doc.getId() + " | BM25: " + String.format("%.3f", bm25) + " | PageRank: " + doc.getPageRank() + " | CTR: " + doc.getClickThroughRate() + " | Recency: " + String.format("%.3f", recency) + " -> Score: " + String.format("%.3f", finalScore));
}
// Re-ranking
scoredDocs.sort((a, b) -> Double.compare(b.score, a.score));
System.out.println("\n--- RANKED SEARCH RESULTS ---");
List<Integer> activeRelevanceList = new ArrayList<>();
for (int i = 0; i < scoredDocs.size(); i++) {
ScoredDoc sd = scoredDocs.get(i);
int rel = groundTruth.getOrDefault(sd.doc.getId(), 0);
activeRelevanceList.add(rel);
System.out.println((i + 1) + ". " + sd.doc.getId() + " (Score: " + String.format("%.3f", sd.score) + ") [Human Relevance: " + rel + "]");
}
// NDCG Evaluation
double ndcg = NDCGEvaluator.computeNDCG(activeRelevanceList, 3);
System.out.println("\nCalculated Model Quality Metric: NDCG@3 = " + String.format("%.4f", ndcg));
System.out.println("Simulation finished successfully.");
}
}