diff options
-rw-r--r-- | .github/workflows/update_publicsuffix_data.yml | 29 | ||||
-rw-r--r-- | .idea/codeStyles/Project.xml | 5 | ||||
-rw-r--r-- | app/build.gradle | 3 | ||||
-rw-r--r-- | app/src/main/assets/publicsuffixes | bin | 0 -> 104170 bytes | |||
-rw-r--r-- | app/src/main/java/mozilla/components/lib/publicsuffixlist/PublicSuffixList.kt | 138 | ||||
-rw-r--r-- | app/src/main/java/mozilla/components/lib/publicsuffixlist/PublicSuffixListData.kt | 161 | ||||
-rw-r--r-- | app/src/main/java/mozilla/components/lib/publicsuffixlist/PublicSuffixListLoader.kt | 51 | ||||
-rw-r--r-- | app/src/main/java/mozilla/components/lib/publicsuffixlist/ext/ByteArray.kt | 125 | ||||
-rw-r--r-- | build.gradle | 10 | ||||
-rw-r--r-- | dependencies.gradle | 8 | ||||
-rw-r--r-- | gradle/wrapper/gradle-wrapper.properties | 1 |
11 files changed, 515 insertions, 16 deletions
diff --git a/.github/workflows/update_publicsuffix_data.yml b/.github/workflows/update_publicsuffix_data.yml new file mode 100644 index 00000000..4929b2da --- /dev/null +++ b/.github/workflows/update_publicsuffix_data.yml @@ -0,0 +1,29 @@ +on: + schedule: + - cron: '0 0 * * *' + +jobs: + update-publicsuffix-data: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v2 + - name: Download new publicsuffix data + run: curl -L https://github.com/mozilla-mobile/android-components/raw/master/components/lib/publicsuffixlist/src/main/assets/publicsuffixes -o app/src/main/assets/publicsuffixes + - name: Compare list changes + run: if [[ $(git diff --binary --stat) != '' ]]; then echo "::set-env name=UPDATED::true"; fi + - name: Create update PR + uses: peter-evans/create-pull-request@v2 + if: env.UPDATED == 'true' + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_PERSONAL_TOKEN }} + with: + token: ${{ secrets.GITHUB_TOKEN }} + commit-message: 'Update Public Suffix List data' + committer: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> + author: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> + title: 'Update Public Suffix List data' + body: 'Updates Public Suffix List from https://publicsuffix.org/list/' + assignees: msfjarvis + labels: PSL + branch: bot/update-psl diff --git a/.idea/codeStyles/Project.xml b/.idea/codeStyles/Project.xml index d365c987..81283c30 100644 --- a/.idea/codeStyles/Project.xml +++ b/.idea/codeStyles/Project.xml @@ -5,6 +5,11 @@ <option name="WRAP_WHEN_TYPING_REACHES_RIGHT_MARGIN" value="true" /> <option name="FORMATTER_TAGS_ENABLED" value="true" /> <option name="SOFT_MARGINS" value="100" /> + <option name="DO_NOT_FORMAT"> + <list> + <fileSet type="namedScope" name="publicsuffixlist" pattern="src[app]:mozilla.components.lib.publicsuffixlist.PublicSuffixList||src[app]:mozilla.components.lib.publicsuffixlist.PublicSuffixListData||src[app]:mozilla.components.lib.publicsuffixlist.PublicSuffixListLoader||src[app]:mozilla.components.lib.publicsuffixlist.ext.ByteArray" /> + </list> + </option> <JavaCodeStyleSettings> <option name="DO_NOT_WRAP_AFTER_SINGLE_ANNOTATION" value="true" /> </JavaCodeStyleSettings> diff --git a/app/build.gradle b/app/build.gradle index 2ea2a8e8..92eb301c 100644 --- a/app/build.gradle +++ b/app/build.gradle @@ -25,7 +25,7 @@ android { viewBinding.enabled = true defaultConfig { - applicationId versions.packageName + applicationId 'dev.msfjarvis.aps' versionCode 10721 versionName '1.8.0-SNAPSHOT' } @@ -101,7 +101,6 @@ dependencies { } implementation deps.third_party.jsch implementation deps.third_party.openpgp_ktx - implementation deps.third_party.publicsuffixlist implementation deps.third_party.ssh_auth implementation deps.third_party.timber implementation deps.third_party.timberkt diff --git a/app/src/main/assets/publicsuffixes b/app/src/main/assets/publicsuffixes Binary files differnew file mode 100644 index 00000000..c47610a8 --- /dev/null +++ b/app/src/main/assets/publicsuffixes diff --git a/app/src/main/java/mozilla/components/lib/publicsuffixlist/PublicSuffixList.kt b/app/src/main/java/mozilla/components/lib/publicsuffixlist/PublicSuffixList.kt new file mode 100644 index 00000000..6df3caca --- /dev/null +++ b/app/src/main/java/mozilla/components/lib/publicsuffixlist/PublicSuffixList.kt @@ -0,0 +1,138 @@ +/* + * SPDX-License-Identifier: GPL-3.0-only OR MPL-2.0 + */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +package mozilla.components.lib.publicsuffixlist + +import android.content.Context +import kotlinx.coroutines.CoroutineDispatcher +import kotlinx.coroutines.CoroutineScope +import kotlinx.coroutines.Deferred +import kotlinx.coroutines.Dispatchers +import kotlinx.coroutines.async + +/** + * API for reading and accessing the public suffix list. + * + * > A "public suffix" is one under which Internet users can (or historically could) directly register names. Some + * > examples of public suffixes are .com, .co.uk and pvt.k12.ma.us. The Public Suffix List is a list of all known + * > public suffixes. + * + * Note that this implementation applies the rules of the public suffix list only and does not validate domains. + * + * https://publicsuffix.org/ + * https://github.com/publicsuffix/list + */ +class PublicSuffixList( + context: Context, + dispatcher: CoroutineDispatcher = Dispatchers.IO, + private val scope: CoroutineScope = CoroutineScope(dispatcher) +) { + private val data: PublicSuffixListData by lazy { PublicSuffixListLoader.load(context) } + + /** + * Prefetch the public suffix list from disk so that it is available in memory. + */ + fun prefetch(): Deferred<Unit> = scope.async { + data.run { Unit } + } + + /** + * Returns true if the given [domain] is a public suffix; false otherwise. + * + * E.g.: + * ``` + * co.uk -> true + * com -> true + * mozilla.org -> false + * org -> true + * ``` + * + * Note that this method ignores the default "prevailing rule" described in the formal public suffix list algorithm: + * If no rule matches then the passed [domain] is assumed to *not* be a public suffix. + * + * @param [domain] _must_ be a valid domain. [PublicSuffixList] performs no validation, and if any unexpected values + * are passed (e.g., a full URL, a domain with a trailing '/', etc) this may return an incorrect result. + */ + fun isPublicSuffix(domain: String): Deferred<Boolean> = scope.async { + when (data.getPublicSuffixOffset(domain)) { + is PublicSuffixOffset.PublicSuffix -> true + else -> false + } + } + + /** + * Returns the public suffix and one more level; known as the registrable domain. Returns `null` if + * [domain] is a public suffix itself. + * + * E.g.: + * ``` + * wwww.mozilla.org -> mozilla.org + * www.bcc.co.uk -> bbc.co.uk + * a.b.ide.kyoto.jp -> b.ide.kyoto.jp + * ``` + * + * @param [domain] _must_ be a valid domain. [PublicSuffixList] performs no validation, and if any unexpected values + * are passed (e.g., a full URL, a domain with a trailing '/', etc) this may return an incorrect result. + */ + fun getPublicSuffixPlusOne(domain: String): Deferred<String?> = scope.async { + when (val offset = data.getPublicSuffixOffset(domain)) { + is PublicSuffixOffset.Offset -> domain + .split('.') + .drop(offset.value) + .joinToString(separator = ".") + else -> null + } + } + + /** + * Returns the public suffix of the given [domain]; known as the effective top-level domain (eTLD). Returns `null` + * if the [domain] is a public suffix itself. + * + * E.g.: + * ``` + * wwww.mozilla.org -> org + * www.bcc.co.uk -> co.uk + * a.b.ide.kyoto.jp -> ide.kyoto.jp + * ``` + * + * @param [domain] _must_ be a valid domain. [PublicSuffixList] performs no validation, and if any unexpected values + * are passed (e.g., a full URL, a domain with a trailing '/', etc) this may return an incorrect result. + */ + fun getPublicSuffix(domain: String) = scope.async { + when (val offset = data.getPublicSuffixOffset(domain)) { + is PublicSuffixOffset.Offset -> domain + .split('.') + .drop(offset.value + 1) + .joinToString(separator = ".") + else -> null + } + } + + /** + * Strips the public suffix from the given [domain]. Returns the original domain if no public suffix could be + * stripped. + * + * E.g.: + * ``` + * wwww.mozilla.org -> www.mozilla + * www.bcc.co.uk -> www.bbc + * a.b.ide.kyoto.jp -> a.b + * ``` + * + * @param [domain] _must_ be a valid domain. [PublicSuffixList] performs no validation, and if any unexpected values + * are passed (e.g., a full URL, a domain with a trailing '/', etc) this may return an incorrect result. + */ + fun stripPublicSuffix(domain: String) = scope.async { + when (val offset = data.getPublicSuffixOffset(domain)) { + is PublicSuffixOffset.Offset -> domain + .split('.') + .joinToString(separator = ".", limit = offset.value + 1, truncated = "") + .dropLast(1) + else -> domain + } + } +} diff --git a/app/src/main/java/mozilla/components/lib/publicsuffixlist/PublicSuffixListData.kt b/app/src/main/java/mozilla/components/lib/publicsuffixlist/PublicSuffixListData.kt new file mode 100644 index 00000000..0cbf6945 --- /dev/null +++ b/app/src/main/java/mozilla/components/lib/publicsuffixlist/PublicSuffixListData.kt @@ -0,0 +1,161 @@ +/* + * SPDX-License-Identifier: GPL-3.0-only OR MPL-2.0 + */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +package mozilla.components.lib.publicsuffixlist + +import mozilla.components.lib.publicsuffixlist.ext.binarySearch +import java.net.IDN + +/** + * Class wrapping the public suffix list data and offering methods for accessing rules in it. + */ +internal class PublicSuffixListData( + private val rules: ByteArray, + private val exceptions: ByteArray +) { + private fun binarySearchRules(labels: List<ByteArray>, labelIndex: Int): String? { + return rules.binarySearch(labels, labelIndex) + } + + private fun binarySearchExceptions(labels: List<ByteArray>, labelIndex: Int): String? { + return exceptions.binarySearch(labels, labelIndex) + } + + @Suppress("ReturnCount") + fun getPublicSuffixOffset(domain: String): PublicSuffixOffset? { + if (domain.isEmpty()) { + return null + } + + val domainLabels = IDN.toUnicode(domain).split('.') + if (domainLabels.find { it.isEmpty() } != null) { + // At least one of the labels is empty: Bail out. + return null + } + + val rule = findMatchingRule(domainLabels) + + if (domainLabels.size == rule.size && rule[0][0] != PublicSuffixListData.EXCEPTION_MARKER) { + // The domain is a public suffix. + return if (rule == PublicSuffixListData.PREVAILING_RULE) { + PublicSuffixOffset.PrevailingRule + } else { + PublicSuffixOffset.PublicSuffix + } + } + + return if (rule[0][0] == PublicSuffixListData.EXCEPTION_MARKER) { + // Exception rules hold the effective TLD plus one. + PublicSuffixOffset.Offset(domainLabels.size - rule.size) + } else { + // Otherwise the rule is for a public suffix, so we must take one more label. + PublicSuffixOffset.Offset(domainLabels.size - (rule.size + 1)) + } + } + + /** + * Find a matching rule for the given domain labels. + * + * This algorithm is based on OkHttp's PublicSuffixDatabase class: + * https://github.com/square/okhttp/blob/master/okhttp/src/main/java/okhttp3/internal/publicsuffix/PublicSuffixDatabase.java + */ + private fun findMatchingRule(domainLabels: List<String>): List<String> { + // Break apart the domain into UTF-8 labels, i.e. foo.bar.com turns into [foo, bar, com]. + val domainLabelsBytes = domainLabels.map { it.toByteArray(Charsets.UTF_8) } + + val exactMatch = findExactMatch(domainLabelsBytes) + val wildcardMatch = findWildcardMatch(domainLabelsBytes) + val exceptionMatch = findExceptionMatch(domainLabelsBytes, wildcardMatch) + + if (exceptionMatch != null) { + return ("${PublicSuffixListData.EXCEPTION_MARKER}$exceptionMatch").split('.') + } + + if (exactMatch == null && wildcardMatch == null) { + return PublicSuffixListData.PREVAILING_RULE + } + + val exactRuleLabels = exactMatch?.split('.') ?: PublicSuffixListData.EMPTY_RULE + val wildcardRuleLabels = wildcardMatch?.split('.') ?: PublicSuffixListData.EMPTY_RULE + + return if (exactRuleLabels.size > wildcardRuleLabels.size) { + exactRuleLabels + } else { + wildcardRuleLabels + } + } + + /** + * Returns an exact match or null. + */ + private fun findExactMatch(labels: List<ByteArray>): String? { + // Start by looking for exact matches. We start at the leftmost label. For example, foo.bar.com + // will look like: [foo, bar, com], [bar, com], [com]. The longest matching rule wins. + + for (i in 0 until labels.size) { + val rule = binarySearchRules(labels, i) + + if (rule != null) { + return rule + } + } + + return null + } + + /** + * Returns a wildcard match or null. + */ + private fun findWildcardMatch(labels: List<ByteArray>): String? { + // In theory, wildcard rules are not restricted to having the wildcard in the leftmost position. + // In practice, wildcards are always in the leftmost position. For now, this implementation + // cheats and does not attempt every possible permutation. Instead, it only considers wildcards + // in the leftmost position. We assert this fact when we generate the public suffix file. If + // this assertion ever fails we'll need to refactor this implementation. + if (labels.size > 1) { + val labelsWithWildcard = labels.toMutableList() + for (labelIndex in 0 until labelsWithWildcard.size) { + labelsWithWildcard[labelIndex] = PublicSuffixListData.WILDCARD_LABEL + val rule = binarySearchRules(labelsWithWildcard, labelIndex) + if (rule != null) { + return rule + } + } + } + + return null + } + + private fun findExceptionMatch(labels: List<ByteArray>, wildcardMatch: String?): String? { + // Exception rules only apply to wildcard rules, so only try it if we matched a wildcard. + if (wildcardMatch == null) { + return null + } + + for (labelIndex in 0 until labels.size) { + val rule = binarySearchExceptions(labels, labelIndex) + if (rule != null) { + return rule + } + } + + return null + } + + companion object { + val WILDCARD_LABEL = byteArrayOf('*'.toByte()) + val PREVAILING_RULE = listOf("*") + val EMPTY_RULE = listOf<String>() + const val EXCEPTION_MARKER = '!' + } +} + +internal sealed class PublicSuffixOffset { + data class Offset(val value: Int) : PublicSuffixOffset() + object PublicSuffix : PublicSuffixOffset() + object PrevailingRule : PublicSuffixOffset() +} diff --git a/app/src/main/java/mozilla/components/lib/publicsuffixlist/PublicSuffixListLoader.kt b/app/src/main/java/mozilla/components/lib/publicsuffixlist/PublicSuffixListLoader.kt new file mode 100644 index 00000000..88e82523 --- /dev/null +++ b/app/src/main/java/mozilla/components/lib/publicsuffixlist/PublicSuffixListLoader.kt @@ -0,0 +1,51 @@ +/* + * SPDX-License-Identifier: GPL-3.0-only OR MPL-2.0 + */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +package mozilla.components.lib.publicsuffixlist + +import android.content.Context +import java.io.BufferedInputStream +import java.io.IOException + +private const val PUBLIC_SUFFIX_LIST_FILE = "publicsuffixes" + +internal object PublicSuffixListLoader { + fun load(context: Context): PublicSuffixListData = context.assets.open( + PUBLIC_SUFFIX_LIST_FILE + ).buffered().use { stream -> + val publicSuffixSize = stream.readInt() + val publicSuffixBytes = stream.readFully(publicSuffixSize) + + val exceptionSize = stream.readInt() + val exceptionBytes = stream.readFully(exceptionSize) + + PublicSuffixListData(publicSuffixBytes, exceptionBytes) + } +} + +@Suppress("MagicNumber") +private fun BufferedInputStream.readInt(): Int { + return (read() and 0xff shl 24 + or (read() and 0xff shl 16) + or (read() and 0xff shl 8) + or (read() and 0xff)) +} + +private fun BufferedInputStream.readFully(size: Int): ByteArray { + val bytes = ByteArray(size) + + var offset = 0 + while (offset < size) { + val read = read(bytes, offset, size - offset) + if (read == -1) { + throw IOException("Unexpected end of stream") + } + offset += read + } + + return bytes +} diff --git a/app/src/main/java/mozilla/components/lib/publicsuffixlist/ext/ByteArray.kt b/app/src/main/java/mozilla/components/lib/publicsuffixlist/ext/ByteArray.kt new file mode 100644 index 00000000..5abb9154 --- /dev/null +++ b/app/src/main/java/mozilla/components/lib/publicsuffixlist/ext/ByteArray.kt @@ -0,0 +1,125 @@ +/* + * SPDX-License-Identifier: GPL-3.0-only OR MPL-2.0 + */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +package mozilla.components.lib.publicsuffixlist.ext + +import kotlin.experimental.and + +private const val BITMASK = 0xff.toByte() + +/** + * Performs a binary search for the provided [labels] on the [ByteArray]'s data. + * + * This algorithm is based on OkHttp's PublicSuffixDatabase class: + * https://github.com/square/okhttp/blob/master/okhttp/src/main/java/okhttp3/internal/publicsuffix/PublicSuffixDatabase.java + */ +@Suppress("ComplexMethod", "NestedBlockDepth") +internal fun ByteArray.binarySearch(labels: List<ByteArray>, labelIndex: Int): String? { + var low = 0 + var high = size + var match: String? = null + + while (low < high) { + val mid = (low + high) / 2 + val start = findStartOfLineFromIndex(mid) + val end = findEndOfLineFromIndex(start) + + val publicSuffixLength = start + end - start + + var compareResult: Int + var currentLabelIndex = labelIndex + var currentLabelByteIndex = 0 + var publicSuffixByteIndex = 0 + + var expectDot = false + while (true) { + val byte0 = if (expectDot) { + expectDot = false + '.'.toByte() + } else { + labels[currentLabelIndex][currentLabelByteIndex] and BITMASK + } + + val byte1 = this[start + publicSuffixByteIndex] and BITMASK + + // Compare the bytes. Note that the file stores UTF-8 encoded bytes, so we must compare the + // unsigned bytes. + @Suppress("EXPERIMENTAL_API_USAGE") + compareResult = (byte0.toUByte() - byte1.toUByte()).toInt() + if (compareResult != 0) { + break + } + + publicSuffixByteIndex++ + currentLabelByteIndex++ + + if (publicSuffixByteIndex == publicSuffixLength) { + break + } + + if (labels[currentLabelIndex].size == currentLabelByteIndex) { + // We've exhausted our current label. Either there are more labels to compare, in which + // case we expect a dot as the next character. Otherwise, we've checked all our labels. + if (currentLabelIndex == labels.size - 1) { + break + } else { + currentLabelIndex++ + currentLabelByteIndex = -1 + expectDot = true + } + } + } + + if (compareResult < 0) { + high = start - 1 + } else if (compareResult > 0) { + low = start + end + 1 + } else { + // We found a match, but are the lengths equal? + val publicSuffixBytesLeft = publicSuffixLength - publicSuffixByteIndex + var labelBytesLeft = labels[currentLabelIndex].size - currentLabelByteIndex + for (i in currentLabelIndex + 1 until labels.size) { + labelBytesLeft += labels[i].size + } + + if (labelBytesLeft < publicSuffixBytesLeft) { + high = start - 1 + } else if (labelBytesLeft > publicSuffixBytesLeft) { + low = start + end + 1 + } else { + // Found a match. + match = String(this, start, publicSuffixLength, Charsets.UTF_8) + break + } + } + } + + return match +} + +/** + * Search for a '\n' that marks the start of a value. Don't go back past the start of the array. + */ +private fun ByteArray.findStartOfLineFromIndex(start: Int): Int { + var index = start + while (index > -1 && this[index] != '\n'.toByte()) { + index-- + } + index++ + return index +} + +/** + * Search for a '\n' that marks the end of a value. + */ +private fun ByteArray.findEndOfLineFromIndex(start: Int): Int { + var end = 1 + while (this[start + end] != '\n'.toByte()) { + end++ + } + return end +} diff --git a/build.gradle b/build.gradle index 8de48223..fa9a99e9 100644 --- a/build.gradle +++ b/build.gradle @@ -26,9 +26,6 @@ subprojects { maven { url 'https://jitpack.io' } - maven { - url 'https://maven.mozilla.org/maven2' - } } pluginManager.withPlugin('kotlin-android') { dependencies { @@ -65,8 +62,7 @@ subprojects { } } -tasks { - wrapper { - distributionType = Wrapper.DistributionType.ALL - } +tasks.wrapper { + distributionType = Wrapper.DistributionType.ALL + distributionSha256Sum = "0f316a67b971b7b571dac7215dcf2591a30994b3450e0629925ffcfe2c68cc5c" } diff --git a/dependencies.gradle b/dependencies.gradle index 52ba9f2d..b0b4dd80 100644 --- a/dependencies.gradle +++ b/dependencies.gradle @@ -6,8 +6,7 @@ ext.versions = [ minSdk: 23, targetSdk: 29, compileSdk: 29, - buildTools: '29.0.3', - packageName: 'dev.msfjarvis.aps' + buildTools: '29.0.3' ] ext.deps = [ @@ -52,11 +51,6 @@ ext.deps = [ jgit: 'org.eclipse.jgit:org.eclipse.jgit:3.7.1.201504261725-r', leakcanary: 'com.squareup.leakcanary:leakcanary-android:2.2', openpgp_ktx: 'com.github.android-password-store:openpgp-ktx:2.0.0', - // The library is updated every two weeks to include the most recent version of the Public - // suffix list. Its API is expected to remain stable for the foreseeable future, and thus - // a reference to the latest version is warranted. - // See: https://github.com/mozilla-mobile/android-components/blob/master/components/lib/publicsuffixlist/README.md - publicsuffixlist: 'org.mozilla.components:lib-publicsuffixlist:+', ssh_auth: 'org.sufficientlysecure:sshauthentication-api:1.0', timber: 'com.jakewharton.timber:timber:4.7.1', timberkt: 'com.github.ajalt:timberkt:1.5.1', diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties index 6623300b..6e06308f 100644 --- a/gradle/wrapper/gradle-wrapper.properties +++ b/gradle/wrapper/gradle-wrapper.properties @@ -1,5 +1,6 @@ distributionBase=GRADLE_USER_HOME distributionPath=wrapper/dists +distributionSha256Sum=0f316a67b971b7b571dac7215dcf2591a30994b3450e0629925ffcfe2c68cc5c distributionUrl=https\://services.gradle.org/distributions/gradle-6.3-all.zip zipStoreBase=GRADLE_USER_HOME zipStorePath=wrapper/dists |