From 08102734445257f9bbd4a7477c49e9e55fd88eb2 Mon Sep 17 00:00:00 2001 From: Vincent Breitmoser Date: Wed, 16 Sep 2020 20:17:55 +0200 Subject: Autofill: Extract AutofillParser into separate subproject (#1101) Co-authored-by: Harsh Shandilya Co-authored-by: Fabian Henneke --- .../lib/publicsuffixlist/PublicSuffixList.kt | 139 ------------------ .../lib/publicsuffixlist/PublicSuffixListData.kt | 163 --------------------- .../lib/publicsuffixlist/PublicSuffixListLoader.kt | 52 ------- .../lib/publicsuffixlist/ext/ByteArray.kt | 125 ---------------- 4 files changed, 479 deletions(-) delete mode 100644 app/src/main/java/mozilla/components/lib/publicsuffixlist/PublicSuffixList.kt delete mode 100644 app/src/main/java/mozilla/components/lib/publicsuffixlist/PublicSuffixListData.kt delete mode 100644 app/src/main/java/mozilla/components/lib/publicsuffixlist/PublicSuffixListLoader.kt delete mode 100644 app/src/main/java/mozilla/components/lib/publicsuffixlist/ext/ByteArray.kt (limited to 'app/src/main/java/mozilla/components') diff --git a/app/src/main/java/mozilla/components/lib/publicsuffixlist/PublicSuffixList.kt b/app/src/main/java/mozilla/components/lib/publicsuffixlist/PublicSuffixList.kt deleted file mode 100644 index 0fb59002..00000000 --- a/app/src/main/java/mozilla/components/lib/publicsuffixlist/PublicSuffixList.kt +++ /dev/null @@ -1,139 +0,0 @@ -/* - * SPDX-License-Identifier: GPL-3.0-only OR MPL-2.0 - */ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ - -package mozilla.components.lib.publicsuffixlist - -import android.content.Context -import kotlinx.coroutines.CoroutineDispatcher -import kotlinx.coroutines.CoroutineScope -import kotlinx.coroutines.Deferred -import kotlinx.coroutines.Dispatchers -import kotlinx.coroutines.async - -/** - * API for reading and accessing the public suffix list. - * - * > A "public suffix" is one under which Internet users can (or historically could) directly register names. Some - * > examples of public suffixes are .com, .co.uk and pvt.k12.ma.us. The Public Suffix List is a list of all known - * > public suffixes. - * - * Note that this implementation applies the rules of the public suffix list only and does not validate domains. - * - * https://publicsuffix.org/ - * https://github.com/publicsuffix/list - */ -class PublicSuffixList( - context: Context, - dispatcher: CoroutineDispatcher = Dispatchers.IO, - private val scope: CoroutineScope = CoroutineScope(dispatcher) -) { - - private val data: PublicSuffixListData by lazy { PublicSuffixListLoader.load(context) } - - /** - * Prefetch the public suffix list from disk so that it is available in memory. - */ - fun prefetch(): Deferred = scope.async { - data.run { Unit } - } - - /** - * Returns true if the given [domain] is a public suffix; false otherwise. - * - * E.g.: - * ``` - * co.uk -> true - * com -> true - * mozilla.org -> false - * org -> true - * ``` - * - * Note that this method ignores the default "prevailing rule" described in the formal public suffix list algorithm: - * If no rule matches then the passed [domain] is assumed to *not* be a public suffix. - * - * @param [domain] _must_ be a valid domain. [PublicSuffixList] performs no validation, and if any unexpected values - * are passed (e.g., a full URL, a domain with a trailing '/', etc) this may return an incorrect result. - */ - fun isPublicSuffix(domain: String): Deferred = scope.async { - when (data.getPublicSuffixOffset(domain)) { - is PublicSuffixOffset.PublicSuffix -> true - else -> false - } - } - - /** - * Returns the public suffix and one more level; known as the registrable domain. Returns `null` if - * [domain] is a public suffix itself. - * - * E.g.: - * ``` - * wwww.mozilla.org -> mozilla.org - * www.bcc.co.uk -> bbc.co.uk - * a.b.ide.kyoto.jp -> b.ide.kyoto.jp - * ``` - * - * @param [domain] _must_ be a valid domain. [PublicSuffixList] performs no validation, and if any unexpected values - * are passed (e.g., a full URL, a domain with a trailing '/', etc) this may return an incorrect result. - */ - fun getPublicSuffixPlusOne(domain: String): Deferred = scope.async { - when (val offset = data.getPublicSuffixOffset(domain)) { - is PublicSuffixOffset.Offset -> domain - .split('.') - .drop(offset.value) - .joinToString(separator = ".") - else -> null - } - } - - /** - * Returns the public suffix of the given [domain]; known as the effective top-level domain (eTLD). Returns `null` - * if the [domain] is a public suffix itself. - * - * E.g.: - * ``` - * wwww.mozilla.org -> org - * www.bcc.co.uk -> co.uk - * a.b.ide.kyoto.jp -> ide.kyoto.jp - * ``` - * - * @param [domain] _must_ be a valid domain. [PublicSuffixList] performs no validation, and if any unexpected values - * are passed (e.g., a full URL, a domain with a trailing '/', etc) this may return an incorrect result. - */ - fun getPublicSuffix(domain: String) = scope.async { - when (val offset = data.getPublicSuffixOffset(domain)) { - is PublicSuffixOffset.Offset -> domain - .split('.') - .drop(offset.value + 1) - .joinToString(separator = ".") - else -> null - } - } - - /** - * Strips the public suffix from the given [domain]. Returns the original domain if no public suffix could be - * stripped. - * - * E.g.: - * ``` - * wwww.mozilla.org -> www.mozilla - * www.bcc.co.uk -> www.bbc - * a.b.ide.kyoto.jp -> a.b - * ``` - * - * @param [domain] _must_ be a valid domain. [PublicSuffixList] performs no validation, and if any unexpected values - * are passed (e.g., a full URL, a domain with a trailing '/', etc) this may return an incorrect result. - */ - fun stripPublicSuffix(domain: String) = scope.async { - when (val offset = data.getPublicSuffixOffset(domain)) { - is PublicSuffixOffset.Offset -> domain - .split('.') - .joinToString(separator = ".", limit = offset.value + 1, truncated = "") - .dropLast(1) - else -> domain - } - } -} diff --git a/app/src/main/java/mozilla/components/lib/publicsuffixlist/PublicSuffixListData.kt b/app/src/main/java/mozilla/components/lib/publicsuffixlist/PublicSuffixListData.kt deleted file mode 100644 index 778e9fee..00000000 --- a/app/src/main/java/mozilla/components/lib/publicsuffixlist/PublicSuffixListData.kt +++ /dev/null @@ -1,163 +0,0 @@ -/* - * SPDX-License-Identifier: GPL-3.0-only OR MPL-2.0 - */ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ - -package mozilla.components.lib.publicsuffixlist - -import mozilla.components.lib.publicsuffixlist.ext.binarySearch -import java.net.IDN - -/** - * Class wrapping the public suffix list data and offering methods for accessing rules in it. - */ -internal class PublicSuffixListData( - private val rules: ByteArray, - private val exceptions: ByteArray -) { - - private fun binarySearchRules(labels: List, labelIndex: Int): String? { - return rules.binarySearch(labels, labelIndex) - } - - private fun binarySearchExceptions(labels: List, labelIndex: Int): String? { - return exceptions.binarySearch(labels, labelIndex) - } - - @Suppress("ReturnCount") - fun getPublicSuffixOffset(domain: String): PublicSuffixOffset? { - if (domain.isEmpty()) { - return null - } - - val domainLabels = IDN.toUnicode(domain).split('.') - if (domainLabels.find { it.isEmpty() } != null) { - // At least one of the labels is empty: Bail out. - return null - } - - val rule = findMatchingRule(domainLabels) - - if (domainLabels.size == rule.size && rule[0][0] != PublicSuffixListData.EXCEPTION_MARKER) { - // The domain is a public suffix. - return if (rule == PublicSuffixListData.PREVAILING_RULE) { - PublicSuffixOffset.PrevailingRule - } else { - PublicSuffixOffset.PublicSuffix - } - } - - return if (rule[0][0] == PublicSuffixListData.EXCEPTION_MARKER) { - // Exception rules hold the effective TLD plus one. - PublicSuffixOffset.Offset(domainLabels.size - rule.size) - } else { - // Otherwise the rule is for a public suffix, so we must take one more label. - PublicSuffixOffset.Offset(domainLabels.size - (rule.size + 1)) - } - } - - /** - * Find a matching rule for the given domain labels. - * - * This algorithm is based on OkHttp's PublicSuffixDatabase class: - * https://github.com/square/okhttp/blob/master/okhttp/src/main/java/okhttp3/internal/publicsuffix/PublicSuffixDatabase.java - */ - private fun findMatchingRule(domainLabels: List): List { - // Break apart the domain into UTF-8 labels, i.e. foo.bar.com turns into [foo, bar, com]. - val domainLabelsBytes = domainLabels.map { it.toByteArray(Charsets.UTF_8) } - - val exactMatch = findExactMatch(domainLabelsBytes) - val wildcardMatch = findWildcardMatch(domainLabelsBytes) - val exceptionMatch = findExceptionMatch(domainLabelsBytes, wildcardMatch) - - if (exceptionMatch != null) { - return ("${PublicSuffixListData.EXCEPTION_MARKER}$exceptionMatch").split('.') - } - - if (exactMatch == null && wildcardMatch == null) { - return PublicSuffixListData.PREVAILING_RULE - } - - val exactRuleLabels = exactMatch?.split('.') ?: PublicSuffixListData.EMPTY_RULE - val wildcardRuleLabels = wildcardMatch?.split('.') ?: PublicSuffixListData.EMPTY_RULE - - return if (exactRuleLabels.size > wildcardRuleLabels.size) { - exactRuleLabels - } else { - wildcardRuleLabels - } - } - - /** - * Returns an exact match or null. - */ - private fun findExactMatch(labels: List): String? { - // Start by looking for exact matches. We start at the leftmost label. For example, foo.bar.com - // will look like: [foo, bar, com], [bar, com], [com]. The longest matching rule wins. - - for (i in 0 until labels.size) { - val rule = binarySearchRules(labels, i) - - if (rule != null) { - return rule - } - } - - return null - } - - /** - * Returns a wildcard match or null. - */ - private fun findWildcardMatch(labels: List): String? { - // In theory, wildcard rules are not restricted to having the wildcard in the leftmost position. - // In practice, wildcards are always in the leftmost position. For now, this implementation - // cheats and does not attempt every possible permutation. Instead, it only considers wildcards - // in the leftmost position. We assert this fact when we generate the public suffix file. If - // this assertion ever fails we'll need to refactor this implementation. - if (labels.size > 1) { - val labelsWithWildcard = labels.toMutableList() - for (labelIndex in 0 until labelsWithWildcard.size) { - labelsWithWildcard[labelIndex] = PublicSuffixListData.WILDCARD_LABEL - val rule = binarySearchRules(labelsWithWildcard, labelIndex) - if (rule != null) { - return rule - } - } - } - - return null - } - - private fun findExceptionMatch(labels: List, wildcardMatch: String?): String? { - // Exception rules only apply to wildcard rules, so only try it if we matched a wildcard. - if (wildcardMatch == null) { - return null - } - - for (labelIndex in 0 until labels.size) { - val rule = binarySearchExceptions(labels, labelIndex) - if (rule != null) { - return rule - } - } - - return null - } - - companion object { - - val WILDCARD_LABEL = byteArrayOf('*'.toByte()) - val PREVAILING_RULE = listOf("*") - val EMPTY_RULE = listOf() - const val EXCEPTION_MARKER = '!' - } -} - -internal sealed class PublicSuffixOffset { - data class Offset(val value: Int) : PublicSuffixOffset() - object PublicSuffix : PublicSuffixOffset() - object PrevailingRule : PublicSuffixOffset() -} diff --git a/app/src/main/java/mozilla/components/lib/publicsuffixlist/PublicSuffixListLoader.kt b/app/src/main/java/mozilla/components/lib/publicsuffixlist/PublicSuffixListLoader.kt deleted file mode 100644 index 65caeae5..00000000 --- a/app/src/main/java/mozilla/components/lib/publicsuffixlist/PublicSuffixListLoader.kt +++ /dev/null @@ -1,52 +0,0 @@ -/* - * SPDX-License-Identifier: GPL-3.0-only OR MPL-2.0 - */ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ - -package mozilla.components.lib.publicsuffixlist - -import android.content.Context -import java.io.BufferedInputStream -import java.io.IOException - -private const val PUBLIC_SUFFIX_LIST_FILE = "publicsuffixes" - -internal object PublicSuffixListLoader { - - fun load(context: Context): PublicSuffixListData = context.assets.open( - PUBLIC_SUFFIX_LIST_FILE - ).buffered().use { stream -> - val publicSuffixSize = stream.readInt() - val publicSuffixBytes = stream.readFully(publicSuffixSize) - - val exceptionSize = stream.readInt() - val exceptionBytes = stream.readFully(exceptionSize) - - PublicSuffixListData(publicSuffixBytes, exceptionBytes) - } -} - -@Suppress("MagicNumber") -private fun BufferedInputStream.readInt(): Int { - return (read() and 0xff shl 24 - or (read() and 0xff shl 16) - or (read() and 0xff shl 8) - or (read() and 0xff)) -} - -private fun BufferedInputStream.readFully(size: Int): ByteArray { - val bytes = ByteArray(size) - - var offset = 0 - while (offset < size) { - val read = read(bytes, offset, size - offset) - if (read == -1) { - throw IOException("Unexpected end of stream") - } - offset += read - } - - return bytes -} diff --git a/app/src/main/java/mozilla/components/lib/publicsuffixlist/ext/ByteArray.kt b/app/src/main/java/mozilla/components/lib/publicsuffixlist/ext/ByteArray.kt deleted file mode 100644 index 43fb7ab1..00000000 --- a/app/src/main/java/mozilla/components/lib/publicsuffixlist/ext/ByteArray.kt +++ /dev/null @@ -1,125 +0,0 @@ -/* - * SPDX-License-Identifier: GPL-3.0-only OR MPL-2.0 - */ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ - -package mozilla.components.lib.publicsuffixlist.ext - -import kotlin.experimental.and - -private const val BITMASK = 0xff.toByte() - -/** - * Performs a binary search for the provided [labels] on the [ByteArray]'s data. - * - * This algorithm is based on OkHttp's PublicSuffixDatabase class: - * https://github.com/square/okhttp/blob/1977136/okhttp/src/main/kotlin/okhttp3/internal/publicsuffix/PublicSuffixDatabase.kt - */ -@Suppress("ComplexMethod", "NestedBlockDepth") -internal fun ByteArray.binarySearch(labels: List, labelIndex: Int): String? { - var low = 0 - var high = size - var match: String? = null - - while (low < high) { - val mid = (low + high) / 2 - val start = findStartOfLineFromIndex(mid) - val end = findEndOfLineFromIndex(start) - - val publicSuffixLength = start + end - start - - var compareResult: Int - var currentLabelIndex = labelIndex - var currentLabelByteIndex = 0 - var publicSuffixByteIndex = 0 - - var expectDot = false - while (true) { - val byte0 = if (expectDot) { - expectDot = false - '.'.toByte() - } else { - labels[currentLabelIndex][currentLabelByteIndex] and BITMASK - } - - val byte1 = this[start + publicSuffixByteIndex] and BITMASK - - // Compare the bytes. Note that the file stores UTF-8 encoded bytes, so we must compare the - // unsigned bytes. - @Suppress("EXPERIMENTAL_API_USAGE") - compareResult = (byte0.toUByte() - byte1.toUByte()).toInt() - if (compareResult != 0) { - break - } - - publicSuffixByteIndex++ - currentLabelByteIndex++ - - if (publicSuffixByteIndex == publicSuffixLength) { - break - } - - if (labels[currentLabelIndex].size == currentLabelByteIndex) { - // We've exhausted our current label. Either there are more labels to compare, in which - // case we expect a dot as the next character. Otherwise, we've checked all our labels. - if (currentLabelIndex == labels.size - 1) { - break - } else { - currentLabelIndex++ - currentLabelByteIndex = -1 - expectDot = true - } - } - } - - if (compareResult < 0) { - high = start - 1 - } else if (compareResult > 0) { - low = start + end + 1 - } else { - // We found a match, but are the lengths equal? - val publicSuffixBytesLeft = publicSuffixLength - publicSuffixByteIndex - var labelBytesLeft = labels[currentLabelIndex].size - currentLabelByteIndex - for (i in currentLabelIndex + 1 until labels.size) { - labelBytesLeft += labels[i].size - } - - if (labelBytesLeft < publicSuffixBytesLeft) { - high = start - 1 - } else if (labelBytesLeft > publicSuffixBytesLeft) { - low = start + end + 1 - } else { - // Found a match. - match = String(this, start, publicSuffixLength, Charsets.UTF_8) - break - } - } - } - - return match -} - -/** - * Search for a '\n' that marks the start of a value. Don't go back past the start of the array. - */ -private fun ByteArray.findStartOfLineFromIndex(start: Int): Int { - var index = start - while (index > -1 && this[index] != '\n'.toByte()) { - index-- - } - index++ - return index -} - -/** - * Search for a '\n' that marks the end of a value. - */ -private fun ByteArray.findEndOfLineFromIndex(start: Int): Int { - var end = 1 - while (this[start + end] != '\n'.toByte()) { - end++ - } - return end -} -- cgit v1.2.3