From 2561338a35dfc95f6a6f675bbe782a5afbe0fd4e Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Sat, 28 Feb 2026 11:48:05 +0000 Subject: [PATCH] perf(TextUtils): extract regexes to private constants to avoid recompilation Co-authored-by: Aatricks <113598245+Aatricks@users.noreply.github.com> --- .../aatricks/novelscraper/util/TextUtils.kt | 71 +++++++++++-------- .../util/TextUtilsBenchmarkTest.kt | 46 ++++++++++++ 2 files changed, 87 insertions(+), 30 deletions(-) create mode 100644 app/src/test/java/io/aatricks/novelscraper/util/TextUtilsBenchmarkTest.kt diff --git a/app/src/main/java/io/aatricks/novelscraper/util/TextUtils.kt b/app/src/main/java/io/aatricks/novelscraper/util/TextUtils.kt index fe7ba74..f0eb60a 100644 --- a/app/src/main/java/io/aatricks/novelscraper/util/TextUtils.kt +++ b/app/src/main/java/io/aatricks/novelscraper/util/TextUtils.kt @@ -59,6 +59,38 @@ object TextUtils { Regex("(\\d+(?:\\.\\d+)?)(?!.*\\d)", RegexOption.IGNORE_CASE) ) + private val COMMON_JUNK_REGEXES = listOf( + Regex("(?i)^read\\s+"), + Regex("(?i)\\s+free\\s+online.*\$"), + Regex("(?i)\\s+online\\s+free.*\$"), + Regex("(?i)\\s*\\|\\s*.*\$"), + Regex("(?i)\\s+at\\s+.*\$"), + Regex("(?i)[\\s–—\\-:]*(MangaBat|NovelFire|MangaPark|MangaKakalot).*\$"), + Regex("(?i)[\\s–—\\-:]*Scan.*\$") + ) + + private val CHAPTER_MARKERS_REGEXES = listOf( + Regex("[–—\\-:]?\\s*(?:chapter|ch|ch\\.)\\s*\\d+.*$", RegexOption.IGNORE_CASE), + Regex("\\s*[–—\\-]\\s*\\d+.*$"), + Regex("\\s*:\\s*\\d+.*$") + ) + + private val CLEAN_SEPARATORS_START_REGEX = Regex("^[\\s–—\\-:\\|]+") + private val CLEAN_SEPARATORS_END_REGEX = Regex("[\\s–—\\-:\\|]+$") + + private val EXTRACT_CHAPTER_LABEL_REGEX_1 = Regex("(?i)(?:chapter|ch|ch\\.|c)\\s*(\\d+)") + private val EXTRACT_CHAPTER_LABEL_REGEX_2 = Regex("[\\s:\\-—–|](\\d+)\\s*$") + private val EXTRACT_CHAPTER_LABEL_REGEX_3 = Regex("\\b(\\d+)\\b") + + private val EXTRACT_CHAPTER_LABEL_URL_REGEXES = listOf( + Regex("chapter\\s*(\\d+)", RegexOption.IGNORE_CASE), + Regex("ch(?:apter)?\\D*(\\d+)", RegexOption.IGNORE_CASE), + Regex("/(\\d+)(?:/|$)"), + Regex("-" + "(\\d+)(?:\\D|$)") + ) + + private val CLEAN_CHAPTER_TITLE_SUBTITLE_REGEX = Regex("(?i)(?:chapter|ch|ch\\.)\\s*\\d+[\\s:\\-—–|]+(.+)") + /** * Remove page numbers from text content. */ @@ -133,30 +165,16 @@ object TextUtils { } private fun removeCommonJunk(text: String): String { - val patterns = listOf( - Regex("(?i)^read\\s+"), - Regex("(?i)\\s+free\\s+online.*\$"), - Regex("(?i)\\s+online\\s+free.*\$"), - Regex("(?i)\\s*\\|\\s*.*\$"), - Regex("(?i)\\s+at\\s+.*\$"), - Regex("(?i)[\\s–—\\-:]*(MangaBat|NovelFire|MangaPark|MangaKakalot).*\$"), - Regex("(?i)[\\s–—\\-:]*Scan.*\$") - ) - return patterns.fold(text) { acc, pattern -> acc.replace(pattern, "") } + return COMMON_JUNK_REGEXES.fold(text) { acc, pattern -> acc.replace(pattern, "") } } private fun removeChapterMarkers(text: String): String { - val patterns = listOf( - Regex("[–—\\-:]?\\s*(?:chapter|ch|ch\\.)\\s*\\d+.*$", RegexOption.IGNORE_CASE), - Regex("\\s*[–—\\-]\\s*\\d+.*$"), - Regex("\\s*:\\s*\\d+.*$") - ) - return patterns.fold(text) { acc, pattern -> acc.replace(pattern, "").trim() } + return CHAPTER_MARKERS_REGEXES.fold(text) { acc, pattern -> acc.replace(pattern, "").trim() } } private fun cleanSeparators(text: String): String { - return text.replace(Regex("^[\\s–—\\-:\\|]+"), "") - .replace(Regex("[\\s–—\\-:\\|]+$"), "") + return text.replace(CLEAN_SEPARATORS_START_REGEX, "") + .replace(CLEAN_SEPARATORS_END_REGEX, "") .trim() } @@ -166,15 +184,15 @@ object TextUtils { fun extractChapterLabel(title: String?): String? { if (title.isNullOrBlank()) return null - Regex("(?i)(?:chapter|ch|ch\\.|c)\\s*(\\d+)").find(title)?.let { + EXTRACT_CHAPTER_LABEL_REGEX_1.find(title)?.let { return "Chapter " + it.groupValues[1] } - Regex("[\\s:\\-—–|](\\d+)\\s*$").find(title)?.let { + EXTRACT_CHAPTER_LABEL_REGEX_2.find(title)?.let { return "Chapter " + it.groupValues[1] } - return Regex("\\b(\\d+)\\b").findAll(title).lastOrNull()?.let { + return EXTRACT_CHAPTER_LABEL_REGEX_3.findAll(title).lastOrNull()?.let { "Chapter " + it.groupValues[1] } } @@ -183,13 +201,7 @@ object TextUtils { * Extract chapter label from URL */ fun extractChapterLabelFromUrl(url: String): String? { - val patterns = listOf( - Regex("chapter\\s*(\\d+)", RegexOption.IGNORE_CASE), - Regex("ch(?:apter)?\\D*(\\d+)", RegexOption.IGNORE_CASE), - Regex("/(\\d+)(?:/|$)"), - Regex("-" + "(\\d+)(?:\\D|$)") - ) - return patterns.firstNotNullOfOrNull { r -> + return EXTRACT_CHAPTER_LABEL_URL_REGEXES.firstNotNullOfOrNull { r -> r.find(url)?.groupValues?.get(1)?.let { "Chapter " + it } } } @@ -439,8 +451,7 @@ object TextUtils { ) { val label = extractChapterLabel(cleaned) if (label != null) { - val subTitleRegex = Regex("(?i)(?:chapter|ch|ch\\.)\\s*\\d+[\\s:\\-—–|]+(.+)") - val subTitle = subTitleRegex.find(cleaned)?.groupValues?.get(1)?.trim() + val subTitle = CLEAN_CHAPTER_TITLE_SUBTITLE_REGEX.find(cleaned)?.groupValues?.get(1)?.trim() return if (!subTitle.isNullOrBlank() && subTitle.length > 2) (label + ": " + subTitle) else label } } diff --git a/app/src/test/java/io/aatricks/novelscraper/util/TextUtilsBenchmarkTest.kt b/app/src/test/java/io/aatricks/novelscraper/util/TextUtilsBenchmarkTest.kt new file mode 100644 index 0000000..adca057 --- /dev/null +++ b/app/src/test/java/io/aatricks/novelscraper/util/TextUtilsBenchmarkTest.kt @@ -0,0 +1,46 @@ +package io.aatricks.novelscraper.util + +import org.junit.Test +import kotlin.system.measureTimeMillis + +class TextUtilsBenchmarkTest { + + @Test + fun benchmarkExtractBaseTitle() { + val web = io.aatricks.novelscraper.data.model.ContentType.WEB + val text = "Read Solo Max-Level Newbie Chapter 233 Free Online | MangaBat" + + // Warmup + for (i in 1..100) { + TextUtils.extractBaseTitle(text, web) + } + + // Measure + val time = measureTimeMillis { + for (i in 1..50000) { + TextUtils.extractBaseTitle(text, web) + } + } + + println("Benchmark ExtractBaseTitle: $time ms for 50000 iterations") + } + + @Test + fun benchmarkExtractChapterLabel() { + val text = "Read Chapter 233 Free Online | MangaBat" + + // Warmup + for (i in 1..100) { + TextUtils.extractChapterLabel(text) + } + + // Measure + val time = measureTimeMillis { + for (i in 1..50000) { + TextUtils.extractChapterLabel(text) + } + } + + println("Benchmark ExtractChapterLabel: $time ms for 50000 iterations") + } +}