summaryrefslogtreecommitdiff
path: root/app/src/main/java
diff options
context:
space:
mode:
Diffstat (limited to 'app/src/main/java')
-rw-r--r--app/src/main/java/mozilla/components/lib/publicsuffixlist/PublicSuffixList.kt138
-rw-r--r--app/src/main/java/mozilla/components/lib/publicsuffixlist/PublicSuffixListData.kt161
-rw-r--r--app/src/main/java/mozilla/components/lib/publicsuffixlist/PublicSuffixListLoader.kt51
-rw-r--r--app/src/main/java/mozilla/components/lib/publicsuffixlist/ext/ByteArray.kt125
4 files changed, 475 insertions, 0 deletions
diff --git a/app/src/main/java/mozilla/components/lib/publicsuffixlist/PublicSuffixList.kt b/app/src/main/java/mozilla/components/lib/publicsuffixlist/PublicSuffixList.kt
new file mode 100644
index 00000000..6df3caca
--- /dev/null
+++ b/app/src/main/java/mozilla/components/lib/publicsuffixlist/PublicSuffixList.kt
@@ -0,0 +1,138 @@
+/*
+ * SPDX-License-Identifier: GPL-3.0-only OR MPL-2.0
+ */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+package mozilla.components.lib.publicsuffixlist
+
+import android.content.Context
+import kotlinx.coroutines.CoroutineDispatcher
+import kotlinx.coroutines.CoroutineScope
+import kotlinx.coroutines.Deferred
+import kotlinx.coroutines.Dispatchers
+import kotlinx.coroutines.async
+
+/**
+ * API for reading and accessing the public suffix list.
+ *
+ * > A "public suffix" is one under which Internet users can (or historically could) directly register names. Some
+ * > examples of public suffixes are .com, .co.uk and pvt.k12.ma.us. The Public Suffix List is a list of all known
+ * > public suffixes.
+ *
+ * Note that this implementation applies the rules of the public suffix list only and does not validate domains.
+ *
+ * https://publicsuffix.org/
+ * https://github.com/publicsuffix/list
+ */
+class PublicSuffixList(
+ context: Context,
+ dispatcher: CoroutineDispatcher = Dispatchers.IO,
+ private val scope: CoroutineScope = CoroutineScope(dispatcher)
+) {
+ private val data: PublicSuffixListData by lazy { PublicSuffixListLoader.load(context) }
+
+ /**
+ * Prefetch the public suffix list from disk so that it is available in memory.
+ */
+ fun prefetch(): Deferred<Unit> = scope.async {
+ data.run { Unit }
+ }
+
+ /**
+ * Returns true if the given [domain] is a public suffix; false otherwise.
+ *
+ * E.g.:
+ * ```
+ * co.uk -> true
+ * com -> true
+ * mozilla.org -> false
+ * org -> true
+ * ```
+ *
+ * Note that this method ignores the default "prevailing rule" described in the formal public suffix list algorithm:
+ * If no rule matches then the passed [domain] is assumed to *not* be a public suffix.
+ *
+ * @param [domain] _must_ be a valid domain. [PublicSuffixList] performs no validation, and if any unexpected values
+ * are passed (e.g., a full URL, a domain with a trailing '/', etc) this may return an incorrect result.
+ */
+ fun isPublicSuffix(domain: String): Deferred<Boolean> = scope.async {
+ when (data.getPublicSuffixOffset(domain)) {
+ is PublicSuffixOffset.PublicSuffix -> true
+ else -> false
+ }
+ }
+
+ /**
+ * Returns the public suffix and one more level; known as the registrable domain. Returns `null` if
+ * [domain] is a public suffix itself.
+ *
+ * E.g.:
+ * ```
+ * wwww.mozilla.org -> mozilla.org
+ * www.bcc.co.uk -> bbc.co.uk
+ * a.b.ide.kyoto.jp -> b.ide.kyoto.jp
+ * ```
+ *
+ * @param [domain] _must_ be a valid domain. [PublicSuffixList] performs no validation, and if any unexpected values
+ * are passed (e.g., a full URL, a domain with a trailing '/', etc) this may return an incorrect result.
+ */
+ fun getPublicSuffixPlusOne(domain: String): Deferred<String?> = scope.async {
+ when (val offset = data.getPublicSuffixOffset(domain)) {
+ is PublicSuffixOffset.Offset -> domain
+ .split('.')
+ .drop(offset.value)
+ .joinToString(separator = ".")
+ else -> null
+ }
+ }
+
+ /**
+ * Returns the public suffix of the given [domain]; known as the effective top-level domain (eTLD). Returns `null`
+ * if the [domain] is a public suffix itself.
+ *
+ * E.g.:
+ * ```
+ * wwww.mozilla.org -> org
+ * www.bcc.co.uk -> co.uk
+ * a.b.ide.kyoto.jp -> ide.kyoto.jp
+ * ```
+ *
+ * @param [domain] _must_ be a valid domain. [PublicSuffixList] performs no validation, and if any unexpected values
+ * are passed (e.g., a full URL, a domain with a trailing '/', etc) this may return an incorrect result.
+ */
+ fun getPublicSuffix(domain: String) = scope.async {
+ when (val offset = data.getPublicSuffixOffset(domain)) {
+ is PublicSuffixOffset.Offset -> domain
+ .split('.')
+ .drop(offset.value + 1)
+ .joinToString(separator = ".")
+ else -> null
+ }
+ }
+
+ /**
+ * Strips the public suffix from the given [domain]. Returns the original domain if no public suffix could be
+ * stripped.
+ *
+ * E.g.:
+ * ```
+ * wwww.mozilla.org -> www.mozilla
+ * www.bcc.co.uk -> www.bbc
+ * a.b.ide.kyoto.jp -> a.b
+ * ```
+ *
+ * @param [domain] _must_ be a valid domain. [PublicSuffixList] performs no validation, and if any unexpected values
+ * are passed (e.g., a full URL, a domain with a trailing '/', etc) this may return an incorrect result.
+ */
+ fun stripPublicSuffix(domain: String) = scope.async {
+ when (val offset = data.getPublicSuffixOffset(domain)) {
+ is PublicSuffixOffset.Offset -> domain
+ .split('.')
+ .joinToString(separator = ".", limit = offset.value + 1, truncated = "")
+ .dropLast(1)
+ else -> domain
+ }
+ }
+}
diff --git a/app/src/main/java/mozilla/components/lib/publicsuffixlist/PublicSuffixListData.kt b/app/src/main/java/mozilla/components/lib/publicsuffixlist/PublicSuffixListData.kt
new file mode 100644
index 00000000..0cbf6945
--- /dev/null
+++ b/app/src/main/java/mozilla/components/lib/publicsuffixlist/PublicSuffixListData.kt
@@ -0,0 +1,161 @@
+/*
+ * SPDX-License-Identifier: GPL-3.0-only OR MPL-2.0
+ */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+package mozilla.components.lib.publicsuffixlist
+
+import mozilla.components.lib.publicsuffixlist.ext.binarySearch
+import java.net.IDN
+
+/**
+ * Class wrapping the public suffix list data and offering methods for accessing rules in it.
+ */
+internal class PublicSuffixListData(
+ private val rules: ByteArray,
+ private val exceptions: ByteArray
+) {
+ private fun binarySearchRules(labels: List<ByteArray>, labelIndex: Int): String? {
+ return rules.binarySearch(labels, labelIndex)
+ }
+
+ private fun binarySearchExceptions(labels: List<ByteArray>, labelIndex: Int): String? {
+ return exceptions.binarySearch(labels, labelIndex)
+ }
+
+ @Suppress("ReturnCount")
+ fun getPublicSuffixOffset(domain: String): PublicSuffixOffset? {
+ if (domain.isEmpty()) {
+ return null
+ }
+
+ val domainLabels = IDN.toUnicode(domain).split('.')
+ if (domainLabels.find { it.isEmpty() } != null) {
+ // At least one of the labels is empty: Bail out.
+ return null
+ }
+
+ val rule = findMatchingRule(domainLabels)
+
+ if (domainLabels.size == rule.size && rule[0][0] != PublicSuffixListData.EXCEPTION_MARKER) {
+ // The domain is a public suffix.
+ return if (rule == PublicSuffixListData.PREVAILING_RULE) {
+ PublicSuffixOffset.PrevailingRule
+ } else {
+ PublicSuffixOffset.PublicSuffix
+ }
+ }
+
+ return if (rule[0][0] == PublicSuffixListData.EXCEPTION_MARKER) {
+ // Exception rules hold the effective TLD plus one.
+ PublicSuffixOffset.Offset(domainLabels.size - rule.size)
+ } else {
+ // Otherwise the rule is for a public suffix, so we must take one more label.
+ PublicSuffixOffset.Offset(domainLabels.size - (rule.size + 1))
+ }
+ }
+
+ /**
+ * Find a matching rule for the given domain labels.
+ *
+ * This algorithm is based on OkHttp's PublicSuffixDatabase class:
+ * https://github.com/square/okhttp/blob/master/okhttp/src/main/java/okhttp3/internal/publicsuffix/PublicSuffixDatabase.java
+ */
+ private fun findMatchingRule(domainLabels: List<String>): List<String> {
+ // Break apart the domain into UTF-8 labels, i.e. foo.bar.com turns into [foo, bar, com].
+ val domainLabelsBytes = domainLabels.map { it.toByteArray(Charsets.UTF_8) }
+
+ val exactMatch = findExactMatch(domainLabelsBytes)
+ val wildcardMatch = findWildcardMatch(domainLabelsBytes)
+ val exceptionMatch = findExceptionMatch(domainLabelsBytes, wildcardMatch)
+
+ if (exceptionMatch != null) {
+ return ("${PublicSuffixListData.EXCEPTION_MARKER}$exceptionMatch").split('.')
+ }
+
+ if (exactMatch == null && wildcardMatch == null) {
+ return PublicSuffixListData.PREVAILING_RULE
+ }
+
+ val exactRuleLabels = exactMatch?.split('.') ?: PublicSuffixListData.EMPTY_RULE
+ val wildcardRuleLabels = wildcardMatch?.split('.') ?: PublicSuffixListData.EMPTY_RULE
+
+ return if (exactRuleLabels.size > wildcardRuleLabels.size) {
+ exactRuleLabels
+ } else {
+ wildcardRuleLabels
+ }
+ }
+
+ /**
+ * Returns an exact match or null.
+ */
+ private fun findExactMatch(labels: List<ByteArray>): String? {
+ // Start by looking for exact matches. We start at the leftmost label. For example, foo.bar.com
+ // will look like: [foo, bar, com], [bar, com], [com]. The longest matching rule wins.
+
+ for (i in 0 until labels.size) {
+ val rule = binarySearchRules(labels, i)
+
+ if (rule != null) {
+ return rule
+ }
+ }
+
+ return null
+ }
+
+ /**
+ * Returns a wildcard match or null.
+ */
+ private fun findWildcardMatch(labels: List<ByteArray>): String? {
+ // In theory, wildcard rules are not restricted to having the wildcard in the leftmost position.
+ // In practice, wildcards are always in the leftmost position. For now, this implementation
+ // cheats and does not attempt every possible permutation. Instead, it only considers wildcards
+ // in the leftmost position. We assert this fact when we generate the public suffix file. If
+ // this assertion ever fails we'll need to refactor this implementation.
+ if (labels.size > 1) {
+ val labelsWithWildcard = labels.toMutableList()
+ for (labelIndex in 0 until labelsWithWildcard.size) {
+ labelsWithWildcard[labelIndex] = PublicSuffixListData.WILDCARD_LABEL
+ val rule = binarySearchRules(labelsWithWildcard, labelIndex)
+ if (rule != null) {
+ return rule
+ }
+ }
+ }
+
+ return null
+ }
+
+ private fun findExceptionMatch(labels: List<ByteArray>, wildcardMatch: String?): String? {
+ // Exception rules only apply to wildcard rules, so only try it if we matched a wildcard.
+ if (wildcardMatch == null) {
+ return null
+ }
+
+ for (labelIndex in 0 until labels.size) {
+ val rule = binarySearchExceptions(labels, labelIndex)
+ if (rule != null) {
+ return rule
+ }
+ }
+
+ return null
+ }
+
+ companion object {
+ val WILDCARD_LABEL = byteArrayOf('*'.toByte())
+ val PREVAILING_RULE = listOf("*")
+ val EMPTY_RULE = listOf<String>()
+ const val EXCEPTION_MARKER = '!'
+ }
+}
+
+internal sealed class PublicSuffixOffset {
+ data class Offset(val value: Int) : PublicSuffixOffset()
+ object PublicSuffix : PublicSuffixOffset()
+ object PrevailingRule : PublicSuffixOffset()
+}
diff --git a/app/src/main/java/mozilla/components/lib/publicsuffixlist/PublicSuffixListLoader.kt b/app/src/main/java/mozilla/components/lib/publicsuffixlist/PublicSuffixListLoader.kt
new file mode 100644
index 00000000..88e82523
--- /dev/null
+++ b/app/src/main/java/mozilla/components/lib/publicsuffixlist/PublicSuffixListLoader.kt
@@ -0,0 +1,51 @@
+/*
+ * SPDX-License-Identifier: GPL-3.0-only OR MPL-2.0
+ */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+package mozilla.components.lib.publicsuffixlist
+
+import android.content.Context
+import java.io.BufferedInputStream
+import java.io.IOException
+
+private const val PUBLIC_SUFFIX_LIST_FILE = "publicsuffixes"
+
+internal object PublicSuffixListLoader {
+ fun load(context: Context): PublicSuffixListData = context.assets.open(
+ PUBLIC_SUFFIX_LIST_FILE
+ ).buffered().use { stream ->
+ val publicSuffixSize = stream.readInt()
+ val publicSuffixBytes = stream.readFully(publicSuffixSize)
+
+ val exceptionSize = stream.readInt()
+ val exceptionBytes = stream.readFully(exceptionSize)
+
+ PublicSuffixListData(publicSuffixBytes, exceptionBytes)
+ }
+}
+
+@Suppress("MagicNumber")
+private fun BufferedInputStream.readInt(): Int {
+ return (read() and 0xff shl 24
+ or (read() and 0xff shl 16)
+ or (read() and 0xff shl 8)
+ or (read() and 0xff))
+}
+
+private fun BufferedInputStream.readFully(size: Int): ByteArray {
+ val bytes = ByteArray(size)
+
+ var offset = 0
+ while (offset < size) {
+ val read = read(bytes, offset, size - offset)
+ if (read == -1) {
+ throw IOException("Unexpected end of stream")
+ }
+ offset += read
+ }
+
+ return bytes
+}
diff --git a/app/src/main/java/mozilla/components/lib/publicsuffixlist/ext/ByteArray.kt b/app/src/main/java/mozilla/components/lib/publicsuffixlist/ext/ByteArray.kt
new file mode 100644
index 00000000..5abb9154
--- /dev/null
+++ b/app/src/main/java/mozilla/components/lib/publicsuffixlist/ext/ByteArray.kt
@@ -0,0 +1,125 @@
+/*
+ * SPDX-License-Identifier: GPL-3.0-only OR MPL-2.0
+ */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+package mozilla.components.lib.publicsuffixlist.ext
+
+import kotlin.experimental.and
+
+private const val BITMASK = 0xff.toByte()
+
+/**
+ * Performs a binary search for the provided [labels] on the [ByteArray]'s data.
+ *
+ * This algorithm is based on OkHttp's PublicSuffixDatabase class:
+ * https://github.com/square/okhttp/blob/master/okhttp/src/main/java/okhttp3/internal/publicsuffix/PublicSuffixDatabase.java
+ */
+@Suppress("ComplexMethod", "NestedBlockDepth")
+internal fun ByteArray.binarySearch(labels: List<ByteArray>, labelIndex: Int): String? {
+ var low = 0
+ var high = size
+ var match: String? = null
+
+ while (low < high) {
+ val mid = (low + high) / 2
+ val start = findStartOfLineFromIndex(mid)
+ val end = findEndOfLineFromIndex(start)
+
+ val publicSuffixLength = start + end - start
+
+ var compareResult: Int
+ var currentLabelIndex = labelIndex
+ var currentLabelByteIndex = 0
+ var publicSuffixByteIndex = 0
+
+ var expectDot = false
+ while (true) {
+ val byte0 = if (expectDot) {
+ expectDot = false
+ '.'.toByte()
+ } else {
+ labels[currentLabelIndex][currentLabelByteIndex] and BITMASK
+ }
+
+ val byte1 = this[start + publicSuffixByteIndex] and BITMASK
+
+ // Compare the bytes. Note that the file stores UTF-8 encoded bytes, so we must compare the
+ // unsigned bytes.
+ @Suppress("EXPERIMENTAL_API_USAGE")
+ compareResult = (byte0.toUByte() - byte1.toUByte()).toInt()
+ if (compareResult != 0) {
+ break
+ }
+
+ publicSuffixByteIndex++
+ currentLabelByteIndex++
+
+ if (publicSuffixByteIndex == publicSuffixLength) {
+ break
+ }
+
+ if (labels[currentLabelIndex].size == currentLabelByteIndex) {
+ // We've exhausted our current label. Either there are more labels to compare, in which
+ // case we expect a dot as the next character. Otherwise, we've checked all our labels.
+ if (currentLabelIndex == labels.size - 1) {
+ break
+ } else {
+ currentLabelIndex++
+ currentLabelByteIndex = -1
+ expectDot = true
+ }
+ }
+ }
+
+ if (compareResult < 0) {
+ high = start - 1
+ } else if (compareResult > 0) {
+ low = start + end + 1
+ } else {
+ // We found a match, but are the lengths equal?
+ val publicSuffixBytesLeft = publicSuffixLength - publicSuffixByteIndex
+ var labelBytesLeft = labels[currentLabelIndex].size - currentLabelByteIndex
+ for (i in currentLabelIndex + 1 until labels.size) {
+ labelBytesLeft += labels[i].size
+ }
+
+ if (labelBytesLeft < publicSuffixBytesLeft) {
+ high = start - 1
+ } else if (labelBytesLeft > publicSuffixBytesLeft) {
+ low = start + end + 1
+ } else {
+ // Found a match.
+ match = String(this, start, publicSuffixLength, Charsets.UTF_8)
+ break
+ }
+ }
+ }
+
+ return match
+}
+
+/**
+ * Search for a '\n' that marks the start of a value. Don't go back past the start of the array.
+ */
+private fun ByteArray.findStartOfLineFromIndex(start: Int): Int {
+ var index = start
+ while (index > -1 && this[index] != '\n'.toByte()) {
+ index--
+ }
+ index++
+ return index
+}
+
+/**
+ * Search for a '\n' that marks the end of a value.
+ */
+private fun ByteArray.findEndOfLineFromIndex(start: Int): Int {
+ var end = 1
+ while (this[start + end] != '\n'.toByte()) {
+ end++
+ }
+ return end
+}