summaryrefslogtreecommitdiffhomepage
path: root/ci/check-trojan-source.sh
blob: 1e35a053387f460ceb45113111a3b317a5718694 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
#!/usr/bin/env bash

# This script scans text and source code for bidirectional Unicode characters.
# See CVE-2021-42574. https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-42574
# UTF-8 encoding is assumed.

# Pass the path to the directory to check as the first argument

set -eu

export LC_ALL=en_US.UTF-8

if [ $# -ne 1 ]; then
    echo "Usage: $0 <path>"
    exit 1
fi
cd "$1"

FILES=()
while IFS='' read -r line; do FILES+=("$line"); done < <( find . -type f -exec grep -Il . {} + )

CODEPOINT_REGEX=$( printf "\u202a\|\u202b\|\u202c\|\u202d\|\u202e\|\u2066\|\u2067\|\u2068\|\u2069" )

matched=0
for file in "${FILES[@]}"; do
    if grep -q "${CODEPOINT_REGEX}" "$file"; then
        echo "Found potentially malicious unicode code points in $file"
        matched=1
    fi
done

if [[ "$matched" == 0 ]]; then
    echo "No potentially malicious unicode found"
fi

exit $matched