From d2eaaef6bec062f56ea04f40667193d37b009ee9 Mon Sep 17 00:00:00 2001 From: Tif Tran Date: Wed, 18 Mar 2026 14:41:29 -0700 Subject: [PATCH 1/2] [DISCO-3505] remove fakespot support for suggest --- components/suggest/src/db.rs | 186 +----------------- components/suggest/src/fakespot.rs | 225 --------------------- components/suggest/src/lib.rs | 1 - components/suggest/src/provider.rs | 14 +- components/suggest/src/query.rs | 8 - components/suggest/src/rs.rs | 25 --- components/suggest/src/schema.rs | 40 ++-- components/suggest/src/store.rs | 261 ------------------------- components/suggest/src/suggestion.rs | 62 +----- components/suggest/src/testing/data.rs | 62 ------ components/suggest/src/testing/mod.rs | 1 - examples/suggest-cli/src/main.rs | 2 - 12 files changed, 26 insertions(+), 861 deletions(-) delete mode 100644 components/suggest/src/fakespot.rs diff --git a/components/suggest/src/db.rs b/components/suggest/src/db.rs index 6f54f72deb..4ce90fd08a 100644 --- a/components/suggest/src/db.rs +++ b/components/suggest/src/db.rs @@ -17,14 +17,13 @@ use sql_support::{open_database, repeat_sql_vars, ConnExt}; use crate::{ config::{SuggestGlobalConfig, SuggestProviderConfig}, error::RusqliteResultExt, - fakespot, geoname::GeonameCache, provider::{AmpMatchingStrategy, SuggestionProvider}, query::{full_keywords_to_fts_content, FtsQuery}, rs::{ DownloadedAmoSuggestion, DownloadedAmpSuggestion, DownloadedDynamicRecord, - DownloadedDynamicSuggestion, DownloadedFakespotSuggestion, DownloadedMdnSuggestion, - DownloadedWikipediaSuggestion, Record, SuggestRecordId, SuggestRecordType, + DownloadedDynamicSuggestion, DownloadedMdnSuggestion, DownloadedWikipediaSuggestion, + Record, SuggestRecordId, SuggestRecordType, }, schema::{clear_database, SuggestConnectionInitializer}, suggestion::{cook_raw_suggestion_url, FtsMatchInfo, Suggestion}, @@ -762,113 +761,6 @@ impl<'a> SuggestDao<'a> { Ok(suggestions) } - /// Fetches Fakespot suggestions - pub fn fetch_fakespot_suggestions(&self, query: &SuggestionQuery) -> Result> { - let fts_query = query.fts_query(); - let sql = r#" - SELECT - s.id, - s.title, - s.url, - s.score, - f.fakespot_grade, - f.product_id, - f.rating, - f.total_reviews, - i.data, - i.mimetype, - f.keywords, - f.product_type - FROM - suggestions s - JOIN - fakespot_fts fts - ON fts.rowid = s.id - JOIN - fakespot_custom_details f - ON f.suggestion_id = s.id - LEFT JOIN - icons i - ON i.id = f.icon_id - WHERE - fakespot_fts MATCH ? - ORDER BY - s.score DESC - "# - .to_string(); - - // Store the list of results plus the suggestion id for calculating the FTS match info - let mut results = - self.conn - .query_rows_and_then_cached(&sql, (&fts_query.match_arg,), |row| { - let id: usize = row.get(0)?; - let score = fakespot::FakespotScore::new( - &query.keyword, - row.get(10)?, - row.get(11)?, - row.get(3)?, - ) - .as_suggest_score(); - Result::Ok(( - Suggestion::Fakespot { - title: row.get(1)?, - url: row.get(2)?, - score, - fakespot_grade: row.get(4)?, - product_id: row.get(5)?, - rating: row.get(6)?, - total_reviews: row.get(7)?, - icon: row.get(8)?, - icon_mimetype: row.get(9)?, - match_info: None, - }, - id, - )) - })?; - // Sort the results, then add the FTS match info to the first one - // For performance reasons, this is only calculated for the result with the highest score. - // We assume that only one that will be shown to the user and therefore the only one we'll - // collect metrics for. - results.sort(); - if let Some((suggestion, id)) = results.first_mut() { - match suggestion { - Suggestion::Fakespot { - match_info, title, .. - } => { - *match_info = Some(self.fetch_fakespot_fts_match_info(&fts_query, *id, title)?); - } - _ => unreachable!(), - } - } - Ok(results - .into_iter() - .map(|(suggestion, _)| suggestion) - .collect()) - } - - fn fetch_fakespot_fts_match_info( - &self, - fts_query: &FtsQuery<'_>, - suggestion_id: usize, - title: &str, - ) -> Result { - let prefix = if fts_query.is_prefix_query { - // If the query was a prefix match query then test if the query without the prefix - // match would have also matched. If not, then this counts as a prefix match. - let sql = "SELECT 1 FROM fakespot_fts WHERE rowid = ? AND fakespot_fts MATCH ?"; - let params = (&suggestion_id, &fts_query.match_arg_without_prefix_match); - !self.conn.exists(sql, params)? - } else { - // If not, then it definitely wasn't a prefix match - false - }; - - Ok(FtsMatchInfo { - prefix, - stemming: fts_query.match_required_stemming(title), - }) - } - /// Fetches dynamic suggestions pub fn fetch_dynamic_suggestions(&self, query: &SuggestionQuery) -> Result> { let Some(suggestion_types) = query @@ -1114,27 +1006,6 @@ impl<'a> SuggestDao<'a> { Ok(()) } - /// Inserts all suggestions from a downloaded Fakespot attachment into the database. - pub fn insert_fakespot_suggestions( - &mut self, - record_id: &SuggestRecordId, - suggestions: &[DownloadedFakespotSuggestion], - ) -> Result<()> { - let mut suggestion_insert = SuggestionInsertStatement::new(self.conn)?; - let mut fakespot_insert = FakespotInsertStatement::new(self.conn)?; - for suggestion in suggestions { - let suggestion_id = suggestion_insert.execute( - record_id, - &suggestion.title, - &suggestion.url, - suggestion.score, - SuggestionProvider::Fakespot, - )?; - fakespot_insert.execute(suggestion_id, suggestion)?; - } - Ok(()) - } - /// Inserts dynamic suggestion records data into the database. pub fn insert_dynamic_suggestions( &mut self, @@ -1295,14 +1166,6 @@ impl<'a> SuggestDao<'a> { named_params! { ":record_id": record_id.as_str() }, )?; self.scope.err_if_interrupted()?; - self.conn.execute_cached( - " - DELETE FROM fakespot_fts - WHERE rowid IN (SELECT id from suggestions WHERE record_id = :record_id) - ", - named_params! { ":record_id": record_id.as_str() }, - )?; - self.scope.err_if_interrupted()?; self.conn.execute_cached( "DELETE FROM suggestions WHERE record_id = :record_id", named_params! { ":record_id": record_id.as_str() }, @@ -1677,51 +1540,6 @@ impl<'conn> MdnInsertStatement<'conn> { } } -struct FakespotInsertStatement<'conn>(rusqlite::Statement<'conn>); - -impl<'conn> FakespotInsertStatement<'conn> { - fn new(conn: &'conn Connection) -> Result { - Ok(Self(conn.prepare( - "INSERT INTO fakespot_custom_details( - suggestion_id, - fakespot_grade, - product_id, - keywords, - product_type, - rating, - total_reviews, - icon_id - ) - VALUES(?, ?, ?, ?, ?, ?, ?, ?) - ", - )?)) - } - - fn execute( - &mut self, - suggestion_id: i64, - fakespot: &DownloadedFakespotSuggestion, - ) -> Result<()> { - let icon_id = fakespot - .product_id - .split_once('-') - .map(|(vendor, _)| format!("fakespot-{vendor}")); - self.0 - .execute(( - suggestion_id, - &fakespot.fakespot_grade, - &fakespot.product_id, - &fakespot.keywords.to_lowercase(), - &fakespot.product_type.to_lowercase(), - fakespot.rating, - fakespot.total_reviews, - icon_id, - )) - .with_context("fakespot insert")?; - Ok(()) - } -} - struct DynamicInsertStatement<'conn>(rusqlite::Statement<'conn>); impl<'conn> DynamicInsertStatement<'conn> { diff --git a/components/suggest/src/fakespot.rs b/components/suggest/src/fakespot.rs deleted file mode 100644 index 551eb1b96c..0000000000 --- a/components/suggest/src/fakespot.rs +++ /dev/null @@ -1,225 +0,0 @@ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ - -/// Fakespot-specific logic -/// -/// Score used to order Fakespot suggestions -/// -/// FakespotScore contains several components, each in the range of [0, 1] -pub struct FakespotScore { - /// Did the query match the `keywords` field exactly? - keywords_score: f64, - /// How well did the query match the `product_type` field? - product_type_score: f64, - /// Fakespot score from the RS data, this reflects the average review, number of reviews, - /// Fakespot grade, etc. - fakespot_score: f64, -} - -impl FakespotScore { - pub fn new(query: &str, keywords: String, product_type: String, fakespot_score: f64) -> Self { - let query = query.to_lowercase(); - let query_terms = split_terms(&query); - Self { - keywords_score: calc_keywords_score(&query_terms, &keywords), - product_type_score: calc_product_type_score(&query_terms, &product_type), - fakespot_score, - } - } - - /// Convert a FakespotScore into the value to use in `Sugggestion::Fakespot::score` - /// - /// This converts FakespotScore into a single float that: - /// - Is > 0.3 so that Fakespot suggestions are preferred to AMP ones - /// - Reflects the Fakespot ordering: - /// - Suggestions with higher keywords_score are greater - /// - If keywords_score is tied, then suggestions with higher product_type_scores are greater - /// - If both are tied, then suggestions with higher fakespot_score are greater - pub fn as_suggest_score(&self) -> f64 { - 0.30 + (0.01 * self.keywords_score) - + (0.001 * self.product_type_score) - + (0.0001 * self.fakespot_score) - } -} - -/// Split a string containing terms into a list of individual terms, normalized to lowercase -fn split_terms(string: &str) -> Vec<&str> { - string.split_whitespace().collect() -} - -fn calc_keywords_score(query_terms: &[&str], keywords: &str) -> f64 { - // Note: We can assume keywords is lower-case, since we do that during ingestion - let keyword_terms = split_terms(keywords); - if keyword_terms.is_empty() { - return 0.0; - } - - if query_terms == keyword_terms { - 1.0 - } else { - 0.0 - } -} - -fn calc_product_type_score(query_terms: &[&str], product_type: &str) -> f64 { - // Note: We can assume product_type is lower-case, since we do that during ingestion - let product_type_terms = split_terms(product_type); - if product_type_terms.is_empty() { - return 0.0; - } - let count = product_type_terms - .iter() - .filter(|t| query_terms.contains(t)) - .count() as f64; - count / product_type_terms.len() as f64 -} - -#[cfg(test)] -mod tests { - use super::*; - - struct KeywordsTestCase { - keywords: &'static str, - query: &'static str, - expected: f64, - } - - impl KeywordsTestCase { - fn test(&self) { - let actual = - calc_keywords_score(&split_terms(&self.query.to_lowercase()), self.keywords); - assert_eq!( - actual, self.expected, - "keywords: {} query: {} expected: {} actual: {actual}", - self.keywords, self.query, self.expected, - ); - } - } - - #[test] - fn test_keywords_score() { - // Keyword score 1.0 on exact matches, 0.0 otherwise - KeywordsTestCase { - keywords: "apple", - query: "apple", - expected: 1.0, - } - .test(); - KeywordsTestCase { - keywords: "apple", - query: "android", - expected: 0.0, - } - .test(); - KeywordsTestCase { - keywords: "apple", - query: "apple phone", - expected: 0.0, - } - .test(); - // Empty keywords should always score 0.0 - KeywordsTestCase { - keywords: "", - query: "", - expected: 0.0, - } - .test(); - KeywordsTestCase { - keywords: "", - query: "apple", - expected: 0.0, - } - .test(); - // Matching should be case insensitive - KeywordsTestCase { - keywords: "apple", - query: "Apple", - expected: 1.0, - } - .test(); - } - - struct ProductTypeTestCase { - query: &'static str, - product_type: &'static str, - expected: f64, - } - impl ProductTypeTestCase { - fn test(&self) { - let actual = calc_product_type_score( - &split_terms(&self.query.to_lowercase()), - self.product_type, - ); - assert_eq!( - actual, self.expected, - "product_type: {} query: {} expected: {} actual: {actual}", - self.product_type, self.query, self.expected, - ); - } - } - - #[test] - fn test_product_type_score() { - // Product type scores based on the percentage of terms in the product type that are also - // present in the query - ProductTypeTestCase { - product_type: "standing desk", - query: "standing desk", - expected: 1.0, - } - .test(); - ProductTypeTestCase { - product_type: "standing desk", - query: "desk", - expected: 0.5, - } - .test(); - ProductTypeTestCase { - product_type: "standing desk", - query: "desk desk desk", - expected: 0.5, - } - .test(); - ProductTypeTestCase { - product_type: "standing desk", - query: "standing", - expected: 0.5, - } - .test(); - ProductTypeTestCase { - product_type: "standing desk", - query: "phone", - expected: 0.0, - } - .test(); - // Extra terms in the query are ignored - ProductTypeTestCase { - product_type: "standing desk", - query: "standing desk for my office", - expected: 1.0, - } - .test(); - // Empty product_type should always score 0.0 - ProductTypeTestCase { - product_type: "", - query: "", - expected: 0.0, - } - .test(); - // Matching should be case insensitive - ProductTypeTestCase { - product_type: "desk", - query: "Desk", - expected: 1.0, - } - .test(); - // Extra spaces are ignored - ProductTypeTestCase { - product_type: "desk", - query: " desk ", - expected: 1.0, - } - .test(); - } -} diff --git a/components/suggest/src/lib.rs b/components/suggest/src/lib.rs index 6f28be654b..5603403bc5 100644 --- a/components/suggest/src/lib.rs +++ b/components/suggest/src/lib.rs @@ -8,7 +8,6 @@ pub mod benchmarks; mod config; mod db; mod error; -mod fakespot; mod geoname; mod metrics; mod provider; diff --git a/components/suggest/src/provider.rs b/components/suggest/src/provider.rs index 12f75b44ea..33cf3332bf 100644 --- a/components/suggest/src/provider.rs +++ b/components/suggest/src/provider.rs @@ -43,7 +43,7 @@ pub enum SuggestionProvider { Yelp = 5, Mdn = 6, Weather = 7, - Fakespot = 8, + // Fakespot = 8, Dynamic = 9, } @@ -56,7 +56,6 @@ impl fmt::Display for SuggestionProvider { Self::Yelp => write!(f, "yelp"), Self::Mdn => write!(f, "mdn"), Self::Weather => write!(f, "weather"), - Self::Fakespot => write!(f, "fakespot"), Self::Dynamic => write!(f, "dynamic"), } } @@ -73,7 +72,7 @@ impl FromSql for SuggestionProvider { } impl SuggestionProvider { - pub fn all() -> [Self; 8] { + pub fn all() -> [Self; 7] { [ Self::Amp, Self::Wikipedia, @@ -81,7 +80,6 @@ impl SuggestionProvider { Self::Yelp, Self::Mdn, Self::Weather, - Self::Fakespot, Self::Dynamic, ] } @@ -95,7 +93,7 @@ impl SuggestionProvider { 5 => Some(Self::Yelp), 6 => Some(Self::Mdn), 7 => Some(Self::Weather), - 8 => Some(Self::Fakespot), + // 8 => Some(Self::Fakespot), removed 9 => Some(Self::Dynamic), _ => None, } @@ -105,7 +103,6 @@ impl SuggestionProvider { pub(crate) fn primary_collection(&self) -> Collection { match self { Self::Amp => Collection::Amp, - Self::Fakespot => Collection::Fakespot, _ => Collection::Other, } } @@ -119,7 +116,6 @@ impl SuggestionProvider { Self::Yelp => SuggestRecordType::Yelp, Self::Mdn => SuggestRecordType::Mdn, Self::Weather => SuggestRecordType::Weather, - Self::Fakespot => SuggestRecordType::Fakespot, Self::Dynamic => SuggestRecordType::Dynamic, } } @@ -150,10 +146,6 @@ impl SuggestionProvider { SuggestRecordType::GeonamesAlternates, ]), )])), - Self::Fakespot => Some(HashMap::from([( - Collection::Fakespot, - HashSet::from([SuggestRecordType::Icon]), - )])), _ => None, } } diff --git a/components/suggest/src/query.rs b/components/suggest/src/query.rs index be2e68f2ed..e56a0334fe 100644 --- a/components/suggest/src/query.rs +++ b/components/suggest/src/query.rs @@ -93,14 +93,6 @@ impl SuggestionQuery { } } - pub fn fakespot(keyword: &str) -> Self { - Self { - keyword: keyword.into(), - providers: vec![SuggestionProvider::Fakespot], - ..Self::default() - } - } - pub fn weather(keyword: &str) -> Self { Self { keyword: keyword.into(), diff --git a/components/suggest/src/rs.rs b/components/suggest/src/rs.rs index 1de188d6b9..401fb4c1bb 100644 --- a/components/suggest/src/rs.rs +++ b/components/suggest/src/rs.rs @@ -46,7 +46,6 @@ use rusqlite::{types::ToSqlOutput, ToSql}; #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub enum Collection { Amp, - Fakespot, Other, } @@ -54,7 +53,6 @@ impl Collection { pub fn name(&self) -> &'static str { match self { Self::Amp => "quicksuggest-amp", - Self::Fakespot => "fakespot-suggest-products", Self::Other => "quicksuggest-other", } } @@ -83,7 +81,6 @@ pub struct SuggestRemoteSettingsClient { // Create a separate client for each collection name amp_client: Arc, other_client: Arc, - fakespot_client: Arc, } impl SuggestRemoteSettingsClient { @@ -91,7 +88,6 @@ impl SuggestRemoteSettingsClient { Self { amp_client: rs_service.make_client(Collection::Amp.name().to_owned()), other_client: rs_service.make_client(Collection::Other.name().to_owned()), - fakespot_client: rs_service.make_client(Collection::Fakespot.name().to_owned()), } } @@ -99,7 +95,6 @@ impl SuggestRemoteSettingsClient { match collection { Collection::Amp => &self.amp_client, Collection::Other => &self.other_client, - Collection::Fakespot => &self.fakespot_client, } } } @@ -194,8 +189,6 @@ pub(crate) enum SuggestRecord { Weather, #[serde(rename = "configuration")] GlobalConfig(DownloadedGlobalConfig), - #[serde(rename = "fakespot-suggestions")] - Fakespot, #[serde(rename = "dynamic-suggestions")] Dynamic(DownloadedDynamicRecord), #[serde(rename = "geonames-2")] // version 2 @@ -226,7 +219,6 @@ pub enum SuggestRecordType { Mdn, Weather, GlobalConfig, - Fakespot, Dynamic, Geonames, GeonamesAlternates, @@ -243,7 +235,6 @@ impl From<&SuggestRecord> for SuggestRecordType { SuggestRecord::Weather => Self::Weather, SuggestRecord::Yelp => Self::Yelp, SuggestRecord::GlobalConfig(_) => Self::GlobalConfig, - SuggestRecord::Fakespot => Self::Fakespot, SuggestRecord::Dynamic(_) => Self::Dynamic, SuggestRecord::Geonames => Self::Geonames, SuggestRecord::GeonamesAlternates => Self::GeonamesAlternates, @@ -278,7 +269,6 @@ impl SuggestRecordType { Self::Mdn, Self::Weather, Self::GlobalConfig, - Self::Fakespot, Self::Dynamic, Self::Geonames, Self::GeonamesAlternates, @@ -295,7 +285,6 @@ impl SuggestRecordType { Self::Mdn => "mdn-suggestions", Self::Weather => "weather", Self::GlobalConfig => "configuration", - Self::Fakespot => "fakespot-suggestions", Self::Dynamic => "dynamic-suggestions", Self::Geonames => "geonames-2", Self::GeonamesAlternates => "geonames-alternates", @@ -495,20 +484,6 @@ pub(crate) struct DownloadedMdnSuggestion { pub score: f64, } -/// A Fakespot suggestion to ingest from an attachment -#[derive(Clone, Debug, Deserialize)] -pub(crate) struct DownloadedFakespotSuggestion { - pub fakespot_grade: String, - pub product_id: String, - pub keywords: String, - pub product_type: String, - pub rating: f64, - pub score: f64, - pub title: String, - pub total_reviews: i64, - pub url: String, -} - /// A dynamic suggestion record's inline data #[derive(Clone, Debug, Deserialize, Serialize)] pub(crate) struct DownloadedDynamicRecord { diff --git a/components/suggest/src/schema.rs b/components/suggest/src/schema.rs index 3f7f74cb92..8c4f39eade 100644 --- a/components/suggest/src/schema.rs +++ b/components/suggest/src/schema.rs @@ -23,7 +23,7 @@ use sql_support::{ /// `clear_database()` by adding their names to `conditional_tables`, unless /// they are cleared via a deletion trigger or there's some other good /// reason not to do so. -pub const VERSION: u32 = 44; +pub const VERSION: u32 = 45; /// The current Suggest database schema. pub const SQL: &str = " @@ -126,31 +126,6 @@ CREATE TABLE amo_custom_details( FOREIGN KEY(suggestion_id) REFERENCES suggestions(id) ON DELETE CASCADE ); -CREATE TABLE fakespot_custom_details( - suggestion_id INTEGER PRIMARY KEY, - fakespot_grade TEXT NOT NULL, - product_id TEXT NOT NULL, - keywords TEXT NOT NULL, - product_type TEXT NOT NULL, - rating REAL NOT NULL, - total_reviews INTEGER NOT NULL, - icon_id TEXT, - FOREIGN KEY(suggestion_id) REFERENCES suggestions(id) ON DELETE CASCADE -); - -CREATE VIRTUAL TABLE IF NOT EXISTS fakespot_fts USING FTS5( - title, - content='', - contentless_delete=1, - tokenize=\"porter unicode61 remove_diacritics 2 tokenchars '''-'\" -); - -CREATE TRIGGER fakespot_ai AFTER INSERT ON fakespot_custom_details BEGIN - INSERT INTO fakespot_fts(rowid, title) - SELECT id, title - FROM suggestions - WHERE id = new.suggestion_id; -END; CREATE VIRTUAL TABLE IF NOT EXISTS amp_fts USING FTS5( full_keywords, @@ -847,6 +822,19 @@ impl ConnectionInitializer for SuggestConnectionInitializer<'_> { )?; Ok(()) } + 44 => { + // Remove Fakespot entirely and force re-ingestion + clear_database(tx)?; + + tx.execute_batch( + " + DROP TRIGGER IF EXISTS fakespot_ai; + DROP TABLE IF EXISTS fakespot_custom_details; + DROP TABLE IF EXISTS fakespot_fts; + ", + )?; + Ok(()) + } _ => Err(open_database::Error::IncompatibleVersion(version)), } diff --git a/components/suggest/src/store.rs b/components/suggest/src/store.rs index 334c44f317..ce09fcc26c 100644 --- a/components/suggest/src/store.rs +++ b/components/suggest/src/store.rs @@ -373,7 +373,6 @@ impl SuggestIngestionConstraints { SuggestionProvider::Yelp, SuggestionProvider::Mdn, SuggestionProvider::Weather, - SuggestionProvider::Fakespot, SuggestionProvider::Dynamic, ]), ..Self::default() @@ -451,7 +450,6 @@ impl SuggestStoreInner { SuggestionProvider::Yelp => dao.fetch_yelp_suggestions(&query), SuggestionProvider::Mdn => dao.fetch_mdn_suggestions(&query), SuggestionProvider::Weather => dao.fetch_weather_suggestions(&query), - SuggestionProvider::Fakespot => dao.fetch_fakespot_suggestions(&query), SuggestionProvider::Dynamic => dao.fetch_dynamic_suggestions(&query), }) })?; @@ -459,9 +457,6 @@ impl SuggestStoreInner { } // Note: it's important that this is a stable sort to keep the intra-provider order stable. - // For example, we can return multiple fakespot-suggestions all with `score=0.245`. In - // that case, they must be in the same order that `fetch_fakespot_suggestions` returned - // them in. suggestions.sort(); if let Some(limit) = query.limit.and_then(|limit| usize::try_from(limit).ok()) { suggestions.truncate(limit); @@ -775,11 +770,6 @@ where SuggestRecord::GlobalConfig(config) => { dao.put_global_config(&SuggestGlobalConfig::from(config))? } - SuggestRecord::Fakespot => { - self.download_attachment(dao, record, context, |dao, record_id, suggestions| { - dao.insert_fakespot_suggestions(record_id, suggestions) - })?; - } SuggestRecord::Dynamic(r) => { if constraints.matches_dynamic_record(r) { self.download_attachment( @@ -2663,257 +2653,6 @@ pub(crate) mod tests { Ok(()) } - #[test] - fn query_fakespot() -> anyhow::Result<()> { - before_each(); - - let store = TestStore::new( - MockRemoteSettingsClient::default() - .with_record(SuggestionProvider::Fakespot.record( - "fakespot-1", - json!([snowglobe_fakespot(), simpsons_fakespot()]), - )) - .with_record(SuggestionProvider::Fakespot.icon(fakespot_amazon_icon())), - ); - store.ingest(SuggestIngestionConstraints::all_providers()); - assert_eq!( - store.fetch_suggestions(SuggestionQuery::fakespot("globe")), - vec![snowglobe_suggestion(Some(FtsMatchInfo { - prefix: false, - stemming: false, - }),) - .with_fakespot_product_type_bonus(0.5)], - ); - assert_eq!( - store.fetch_suggestions(SuggestionQuery::fakespot("simpsons")), - vec![simpsons_suggestion(Some(FtsMatchInfo { - prefix: false, - stemming: false, - }),)], - ); - // The snowglobe suggestion should come before the simpsons one, since `snow` is a partial - // match on the product_type field. - assert_eq!( - store.fetch_suggestions(SuggestionQuery::fakespot("snow")), - vec![ - snowglobe_suggestion(Some(FtsMatchInfo { - prefix: false, - stemming: false, - }),) - .with_fakespot_product_type_bonus(0.5), - simpsons_suggestion(None), - ], - ); - // Test FTS by using a query where the keywords are separated in the source text - assert_eq!( - store.fetch_suggestions(SuggestionQuery::fakespot("simpsons snow")), - vec![simpsons_suggestion(Some(FtsMatchInfo { - prefix: false, - stemming: false, - }),)], - ); - // Special characters should be stripped out - assert_eq!( - store.fetch_suggestions(SuggestionQuery::fakespot("simpsons + snow")), - vec![simpsons_suggestion(Some(FtsMatchInfo { - prefix: false, - // This is incorrectly counted as stemming, since nothing matches the `+` - // character. TODO: fix this be improving the tokenizer in `FtsQuery`. - stemming: true, - }),)], - ); - - Ok(()) - } - - #[test] - fn fakespot_keywords() -> anyhow::Result<()> { - before_each(); - - let store = TestStore::new( - MockRemoteSettingsClient::default() - .with_record(SuggestionProvider::Fakespot.record( - "fakespot-1", - json!([ - // Snow normally returns the snowglobe first. Test using the keyword field - // to force the simpsons result first. - snowglobe_fakespot(), - simpsons_fakespot().merge(json!({"keywords": "snow"})), - ]), - )) - .with_record(SuggestionProvider::Fakespot.icon(fakespot_amazon_icon())), - ); - store.ingest(SuggestIngestionConstraints::all_providers()); - assert_eq!( - store.fetch_suggestions(SuggestionQuery::fakespot("snow")), - vec![ - simpsons_suggestion(Some(FtsMatchInfo { - prefix: false, - stemming: false, - }),) - .with_fakespot_keyword_bonus(), - snowglobe_suggestion(None).with_fakespot_product_type_bonus(0.5), - ], - ); - Ok(()) - } - - #[test] - fn fakespot_prefix_matching() -> anyhow::Result<()> { - before_each(); - - let store = TestStore::new( - MockRemoteSettingsClient::default() - .with_record(SuggestionProvider::Fakespot.record( - "fakespot-1", - json!([snowglobe_fakespot(), simpsons_fakespot()]), - )) - .with_record(SuggestionProvider::Fakespot.icon(fakespot_amazon_icon())), - ); - store.ingest(SuggestIngestionConstraints::all_providers()); - assert_eq!( - store.fetch_suggestions(SuggestionQuery::fakespot("simp")), - vec![simpsons_suggestion(Some(FtsMatchInfo { - prefix: true, - stemming: false, - }),)], - ); - assert_eq!( - store.fetch_suggestions(SuggestionQuery::fakespot("simps")), - vec![simpsons_suggestion(Some(FtsMatchInfo { - prefix: true, - stemming: false, - }),)], - ); - assert_eq!( - store.fetch_suggestions(SuggestionQuery::fakespot("simpson")), - vec![simpsons_suggestion(Some(FtsMatchInfo { - prefix: false, - stemming: false, - }),)], - ); - - Ok(()) - } - - #[test] - fn fakespot_updates_and_deletes() -> anyhow::Result<()> { - before_each(); - - let mut store = TestStore::new( - MockRemoteSettingsClient::default() - .with_record(SuggestionProvider::Fakespot.record( - "fakespot-1", - json!([snowglobe_fakespot(), simpsons_fakespot()]), - )) - .with_record(SuggestionProvider::Fakespot.icon(fakespot_amazon_icon())), - ); - store.ingest(SuggestIngestionConstraints::all_providers()); - - // Update the snapshot so that: - // - The Simpsons entry is deleted - // - Snow globes now use sea glass instead of glitter - store - .client_mut() - .update_record(SuggestionProvider::Fakespot.record( - "fakespot-1", - json!([ - snowglobe_fakespot().merge(json!({"title": "Make Your Own Sea Glass Snow Globes"})) - ]), - )); - store.ingest(SuggestIngestionConstraints::all_providers()); - - assert_eq!( - store.fetch_suggestions(SuggestionQuery::fakespot("glitter")), - vec![], - ); - assert!(matches!( - store.fetch_suggestions(SuggestionQuery::fakespot("sea glass")).as_slice(), - [ - Suggestion::Fakespot { title, .. } - ] - if title == "Make Your Own Sea Glass Snow Globes" - )); - - assert_eq!( - store.fetch_suggestions(SuggestionQuery::fakespot("simpsons")), - vec![], - ); - - Ok(()) - } - - /// Test the pathological case where we ingest records with the same id, but from different - /// collections - #[test] - fn same_record_id_different_collections() -> anyhow::Result<()> { - before_each(); - - let mut store = TestStore::new( - MockRemoteSettingsClient::default() - // This record is in the fakespot-suggest-products collection - .with_record( - SuggestionProvider::Fakespot - .record("fakespot-1", json!([snowglobe_fakespot()])), - ) - // This record is in the Amp collection, but it has a fakespot record ID - // for some reason. - .with_record(SuggestionProvider::Amp.record("fakespot-1", json![los_pollos_amp()])) - .with_record(SuggestionProvider::Amp.icon(los_pollos_icon())) - .with_record(SuggestionProvider::Fakespot.icon(fakespot_amazon_icon())), - ); - store.ingest(SuggestIngestionConstraints::all_providers()); - assert_eq!( - store.fetch_suggestions(SuggestionQuery::fakespot("globe")), - vec![snowglobe_suggestion(Some(FtsMatchInfo { - prefix: false, - stemming: false, - }),) - .with_fakespot_product_type_bonus(0.5)], - ); - assert_eq!( - store.fetch_suggestions(SuggestionQuery::amp("lo")), - vec![los_pollos_suggestion("los pollos", None)], - ); - // Test deleting one of the records - store - .client_mut() - .delete_record(SuggestionProvider::Amp.empty_record("fakespot-1")) - .delete_record(SuggestionProvider::Amp.icon(los_pollos_icon())); - store.ingest(SuggestIngestionConstraints::all_providers()); - // FIXME(Bug 1912283): this setup currently deletes both suggestions, since - // `drop_suggestions` only checks against record ID. - // - // assert_eq!( - // store.fetch_suggestions(SuggestionQuery::fakespot("globe")), - // vec![snowglobe_suggestion()], - // ); - // assert_eq!( - // store.fetch_suggestions(SuggestionQuery::amp("lo")), - // vec![], - // ); - - // However, we can test that the ingested records table has the correct entries - - let record_keys = store - .read(|dao| dao.get_ingested_records()) - .unwrap() - .into_iter() - .map(|r| format!("{}:{}", r.collection, r.id.as_str())) - .collect::>(); - assert_eq!( - record_keys - .iter() - .map(String::as_str) - .collect::>(), - HashSet::from([ - "fakespot-suggest-products:fakespot-1", - "fakespot-suggest-products:icon-fakespot-amazon", - ]), - ); - Ok(()) - } - #[test] fn dynamic_basic() -> anyhow::Result<()> { before_each(); diff --git a/components/suggest/src/suggestion.rs b/components/suggest/src/suggestion.rs index f05f3ae569..b9ed8608ec 100644 --- a/components/suggest/src/suggestion.rs +++ b/components/suggest/src/suggestion.rs @@ -84,21 +84,6 @@ pub enum Suggestion { city: Option, score: f64, }, - Fakespot { - fakespot_grade: String, - product_id: String, - rating: f64, - title: String, - total_reviews: i64, - url: String, - icon: Option>, - icon_mimetype: Option, - score: f64, - // Details about the FTS match. For performance reasons, this is only calculated for the - // result with the highest score. We assume that only one that will be shown to the user - // and therefore the only one we'll collect metrics for. - match_info: Option, - }, Dynamic { suggestion_type: String, data: Option, @@ -153,8 +138,7 @@ impl Suggestion { | Self::Amo { .. } | Self::Yelp { .. } | Self::Mdn { .. } - | Self::Weather { .. } - | Self::Fakespot { .. } => self.raw_url(), + | Self::Weather { .. } => self.raw_url(), } } @@ -165,8 +149,7 @@ impl Suggestion { | Self::Wikipedia { url, .. } | Self::Amo { url, .. } | Self::Yelp { url, .. } - | Self::Mdn { url, .. } - | Self::Fakespot { url, .. } => Some(url), + | Self::Mdn { url, .. } => Some(url), Self::Weather { .. } | Self::Dynamic { .. } => None, } } @@ -183,7 +166,6 @@ impl Suggestion { | Self::Yelp { .. } | Self::Mdn { .. } | Self::Weather { .. } - | Self::Fakespot { .. } | Self::Dynamic { .. } => self.url(), } } @@ -194,18 +176,16 @@ impl Suggestion { | Self::Wikipedia { title, .. } | Self::Amo { title, .. } | Self::Yelp { title, .. } - | Self::Mdn { title, .. } - | Self::Fakespot { title, .. } => title, + | Self::Mdn { title, .. } => title, _ => "untitled", } } pub fn icon_data(&self) -> Option<&[u8]> { match self { - Self::Amp { icon, .. } - | Self::Wikipedia { icon, .. } - | Self::Yelp { icon, .. } - | Self::Fakespot { icon, .. } => icon.as_deref(), + Self::Amp { icon, .. } | Self::Wikipedia { icon, .. } | Self::Yelp { icon, .. } => { + icon.as_deref() + } _ => None, } } @@ -217,41 +197,13 @@ impl Suggestion { | Self::Yelp { score, .. } | Self::Mdn { score, .. } | Self::Weather { score, .. } - | Self::Fakespot { score, .. } | Self::Dynamic { score, .. } => *score, Self::Wikipedia { .. } => DEFAULT_SUGGESTION_SCORE, } } pub fn fts_match_info(&self) -> Option<&FtsMatchInfo> { - match self { - Self::Fakespot { match_info, .. } => match_info.as_ref(), - _ => None, - } - } -} - -#[cfg(test)] -/// Testing utilitise -impl Suggestion { - pub fn with_fakespot_keyword_bonus(mut self) -> Self { - match &mut self { - Self::Fakespot { score, .. } => { - *score += 0.01; - } - _ => panic!("Not Suggestion::Fakespot"), - } - self - } - - pub fn with_fakespot_product_type_bonus(mut self, bonus: f64) -> Self { - match &mut self { - Self::Fakespot { score, .. } => { - *score += 0.001 * bonus; - } - _ => panic!("Not Suggestion::Fakespot"), - } - self + None } } diff --git a/components/suggest/src/testing/data.rs b/components/suggest/src/testing/data.rs index 51eb015e59..47055645cc 100644 --- a/components/suggest/src/testing/data.rs +++ b/components/suggest/src/testing/data.rs @@ -392,68 +392,6 @@ pub fn multimatch_wiki_suggestion() -> Suggestion { } } -// Fakespot test data - -pub fn snowglobe_fakespot() -> JsonValue { - json!({ - "fakespot_grade": "B", - "product_id": "amazon-ABC", - "keywords": "", - "product_type": "snow globe", - "rating": 4.7, - "score": 0.8, - "title": "Make Your Own Glitter Snow Globes", - "total_reviews": 152, - "url": "http://amazon.com/dp/ABC" - }) -} - -pub fn snowglobe_suggestion(match_info: Option) -> Suggestion { - Suggestion::Fakespot { - fakespot_grade: "B".into(), - product_id: "amazon-ABC".into(), - rating: 4.7, - title: "Make Your Own Glitter Snow Globes".into(), - total_reviews: 152, - url: "http://amazon.com/dp/ABC".into(), - score: 0.3 + 0.00008, - icon: Some("fakespot-icon-amazon-data".as_bytes().to_vec()), - icon_mimetype: Some("image/png".into()), - match_info, - } -} - -pub fn simpsons_fakespot() -> JsonValue { - json!({ - "fakespot_grade": "A", - // Use a product ID that doesn't match the ingested icons to test what happens. In this - // case, icon and icon_mimetype for the returned Suggestion should both be None. - "product_id": "vendorwithouticon-XYZ", - "keywords": "", - "product_type": "", - "rating": 4.9, - "score": 0.9, - "title": "The Simpsons: Skinner's Sense of Snow (DVD)", - "total_reviews": 14000, - "url": "http://vendorwithouticon.com/dp/XYZ" - }) -} - -pub fn simpsons_suggestion(match_info: Option) -> Suggestion { - Suggestion::Fakespot { - fakespot_grade: "A".into(), - product_id: "vendorwithouticon-XYZ".into(), - rating: 4.9, - title: "The Simpsons: Skinner's Sense of Snow (DVD)".into(), - total_reviews: 14000, - url: "http://vendorwithouticon.com/dp/XYZ".into(), - score: 0.3 + 0.00009, - icon: None, - icon_mimetype: None, - match_info, - } -} - pub fn fakespot_amazon_icon() -> MockIcon { MockIcon { id: "fakespot-amazon", diff --git a/components/suggest/src/testing/mod.rs b/components/suggest/src/testing/mod.rs index e14ac61225..59a1992994 100644 --- a/components/suggest/src/testing/mod.rs +++ b/components/suggest/src/testing/mod.rs @@ -63,7 +63,6 @@ impl Suggestion { Self::Mdn { score, .. } => score, Self::Weather { score, .. } => score, Self::Wikipedia { .. } => panic!("with_score not valid for wikipedia suggestions"), - Self::Fakespot { score, .. } => score, Self::Dynamic { score, .. } => score, }; *current_score = score; diff --git a/examples/suggest-cli/src/main.rs b/examples/suggest-cli/src/main.rs index 19684a9f3e..105596f17b 100644 --- a/examples/suggest-cli/src/main.rs +++ b/examples/suggest-cli/src/main.rs @@ -89,7 +89,6 @@ enum SuggestionProviderArg { Yelp, Mdn, Weather, - Fakespot, } impl From for SuggestionProvider { @@ -101,7 +100,6 @@ impl From for SuggestionProvider { SuggestionProviderArg::Yelp => Self::Yelp, SuggestionProviderArg::Mdn => Self::Mdn, SuggestionProviderArg::Weather => Self::Weather, - SuggestionProviderArg::Fakespot => Self::Fakespot, } } } From 43807a522b84a9a144247cfada0da39430adee7b Mon Sep 17 00:00:00 2001 From: Tif Tran Date: Wed, 18 Mar 2026 15:46:40 -0700 Subject: [PATCH 2/2] benchmark changes --- components/suggest/src/benchmarks/client.rs | 9 ---- components/suggest/src/benchmarks/ingest.rs | 10 ---- components/suggest/src/benchmarks/query.rs | 52 --------------------- components/suggest/src/testing/data.rs | 8 ---- 4 files changed, 79 deletions(-) diff --git a/components/suggest/src/benchmarks/client.rs b/components/suggest/src/benchmarks/client.rs index 640772c425..9337777687 100644 --- a/components/suggest/src/benchmarks/client.rs +++ b/components/suggest/src/benchmarks/client.rs @@ -38,15 +38,6 @@ impl RemoteSettingsBenchmarkClient { })?, rs::Collection::Other, )?; - new_benchmark_client.fetch_data_with_client( - remote_settings::RemoteSettings::new(remote_settings::RemoteSettingsConfig { - server: None, - bucket_name: None, - collection_name: rs::Collection::Fakespot.name().to_owned(), - server_url: None, - })?, - rs::Collection::Fakespot, - )?; Ok(new_benchmark_client) } diff --git a/components/suggest/src/benchmarks/ingest.rs b/components/suggest/src/benchmarks/ingest.rs index 43e791ff88..5fd2d0d059 100644 --- a/components/suggest/src/benchmarks/ingest.rs +++ b/components/suggest/src/benchmarks/ingest.rs @@ -172,14 +172,6 @@ pub fn all_benchmarks() -> Vec<(&'static str, IngestBenchmark)> { true, ), ), - ( - "ingest-fakespot", - IngestBenchmark::new(SuggestionProvider::Fakespot, false), - ), - ( - "ingest-again-fakespot", - IngestBenchmark::new(SuggestionProvider::Fakespot, true), - ), ] } @@ -191,8 +183,6 @@ pub fn print_debug_ingestion_sizes() { ); store .ingest(SuggestIngestionConstraints { - // Uncomment to measure the size for a specific provider - // providers: Some(vec![crate::SuggestionProvider::Fakespot]), ..SuggestIngestionConstraints::default() }) .unwrap(); diff --git a/components/suggest/src/benchmarks/query.rs b/components/suggest/src/benchmarks/query.rs index 02f4d81f26..c20a2dbac6 100644 --- a/components/suggest/src/benchmarks/query.rs +++ b/components/suggest/src/benchmarks/query.rs @@ -58,59 +58,7 @@ impl BenchmarkWithInput for QueryBenchmark { pub fn all_benchmarks() -> Vec<(&'static str, QueryBenchmark)> { vec![ - // Fakespot queries, these attempt to perform prefix matches with various character - // lengths. - // // The query code will only do a prefix match if the total input length is > 3 chars. - // Therefore, to test shorter prefixes we use 2-term queries. - ( - "query-fakespot-hand-s", - QueryBenchmark { - provider: SuggestionProvider::Fakespot, - query: "hand s", - should_match: true, - } - ), - ( - "query-fakespot-hand-sa", - QueryBenchmark { - provider: SuggestionProvider::Fakespot, - query: "hand sa", - should_match: true, - } - ), - ( - "query-fakespot-hand-san", - QueryBenchmark { - provider: SuggestionProvider::Fakespot, - query: "hand san", - should_match: true, - } - ), - ( - "query-fakespot-sani", - QueryBenchmark { - provider: SuggestionProvider::Fakespot, - query: "sani", - should_match: true, - } - ), - ( - "query-fakespot-sanit", - QueryBenchmark { - provider: SuggestionProvider::Fakespot, - query: "sanit", - should_match: true, - } - ), - ( - "query-fakespot-saniti", - QueryBenchmark { - provider: SuggestionProvider::Fakespot, - query: "saniti", - should_match: false, - }, - ), // weather: no matches ( diff --git a/components/suggest/src/testing/data.rs b/components/suggest/src/testing/data.rs index 47055645cc..9fde6658fa 100644 --- a/components/suggest/src/testing/data.rs +++ b/components/suggest/src/testing/data.rs @@ -391,11 +391,3 @@ pub fn multimatch_wiki_suggestion() -> Suggestion { full_keyword: "multimatch".into(), } } - -pub fn fakespot_amazon_icon() -> MockIcon { - MockIcon { - id: "fakespot-amazon", - data: "fakespot-icon-amazon-data", - mimetype: "image/png", - } -}