72f2e1a897
Signed-off-by: Fanjun Kong <kongfanjun@iscas.ac.cn>
472 lines
16 KiB
Bash
Executable File
472 lines
16 KiB
Bash
Executable File
#!/bin/bash
|
||
set -euo pipefail
|
||
|
||
source "$(dirname "$0")/config.sh"
|
||
source "$(dirname "$0")/common.sh"
|
||
|
||
log() {
|
||
# 使用 printf 防止日志注入
|
||
printf '[%s] %s\n' "$(date '+%Y-%m-%d %H:%M:%S')" "$*" | tee -a "$LOG_FILE"
|
||
}
|
||
|
||
error() {
|
||
# 使用 printf 防止日志注入
|
||
printf '[ERROR] %s\n' "$*" | tee -a "$ERROR_LOG" >&2
|
||
}
|
||
|
||
# 扫描单个包(无数据库写入)
|
||
scan_package() {
|
||
local pkg_name="$1"
|
||
|
||
# 验证包名格式(防止路径遍历和命令注入)
|
||
if ! validate_package_name "$pkg_name"; then
|
||
error "非法包名格式: $pkg_name"
|
||
log_failure "$pkg_name" "invalid_name"
|
||
return 1
|
||
fi
|
||
|
||
local pkg_dir="${EXTRACT_DIR}/${pkg_name}"
|
||
local result_file="${RESULTS_DIR}/${pkg_name}.json"
|
||
local status_file="${RESULTS_DIR}/scanned.txt"
|
||
local rpm_file=""
|
||
|
||
# 资源清理函数(在函数内定义以访问局部变量)
|
||
cleanup_pkg_dir() {
|
||
# 使用 ${pkg_dir:-} 避免变量未定义时的错误
|
||
if [ -n "${pkg_dir:-}" ] && [ -d "$pkg_dir" ]; then
|
||
rm -rf "$pkg_dir"
|
||
fi
|
||
}
|
||
|
||
# 设置资源清理 trap,确保临时文件在任何情况下都被清理
|
||
trap cleanup_pkg_dir EXIT ERR INT TERM
|
||
|
||
# 使用文件锁检查是否已扫描,并在同一锁内标记为处理中
|
||
# 修复 TOCTOU 竞态条件:检查和标记必须在同一锁区域
|
||
(
|
||
flock -x 200
|
||
if grep -qx "$pkg_name" "$status_file" 2>/dev/null; then
|
||
log "跳过已扫描: $pkg_name"
|
||
exit 0
|
||
fi
|
||
# 立即标记为处理中,防止其他进程重复扫描
|
||
echo "$pkg_name" >> "$status_file"
|
||
) 200>"${status_file}.lock"
|
||
|
||
# 下载包
|
||
mkdir -p "$pkg_dir"
|
||
if ! timeout "$DOWNLOAD_TIMEOUT" dnf download --destdir "$RPM_CACHE_DIR" "$pkg_name" &>/dev/null; then
|
||
error "下载失败: $pkg_name"
|
||
log_failure "$pkg_name" "download_failed"
|
||
return 1
|
||
fi
|
||
|
||
# 解压
|
||
local rpm_file=$(find "$RPM_CACHE_DIR" -name "${pkg_name}.rpm" | head -1)
|
||
if [ -z "$rpm_file" ]; then
|
||
error "找不到 RPM 文件: $pkg_name"
|
||
log_failure "$pkg_name" "not_found"
|
||
return 1
|
||
fi
|
||
|
||
cd "$pkg_dir"
|
||
if ! rpm2cpio "$rpm_file" | cpio -idm 2>/dev/null; then
|
||
error "解压失败: $pkg_name"
|
||
log_failure "$pkg_name" "extract_failed"
|
||
return 1
|
||
fi
|
||
|
||
# 检查文件路径是否应该被排除
|
||
should_exclude_path() {
|
||
local file_path="$1"
|
||
for pattern in "${EXCLUDE_PATHS[@]}"; do
|
||
# 将路径转换为相对于 pkg_dir 的路径进行匹配
|
||
local rel_path="${file_path#$pkg_dir/}"
|
||
# 使用 glob 模式匹配
|
||
case "$rel_path" in
|
||
$pattern)
|
||
return 0
|
||
;;
|
||
esac
|
||
done
|
||
return 1
|
||
}
|
||
|
||
# 查找 ELF 文件(去重)
|
||
local elf_list="${pkg_dir}/elf_files.txt"
|
||
> "$elf_list"
|
||
|
||
# 遍历目标路径查找文件
|
||
for target_path in "${TARGET_PATHS[@]}"; do
|
||
local search_path="${pkg_dir}/${target_path}"
|
||
if [ -d "$search_path" ]; then
|
||
# 直接列出文件,使用 NUL 分隔符处理特殊字符
|
||
find "$search_path" -type f ! -name '*.ko' ! -name '*.ko.xz' -print0 2>/dev/null | \
|
||
while IFS= read -r -d '' file; do
|
||
# 跳过匹配排除路径的文件
|
||
if ! should_exclude_path "$file"; then
|
||
echo "$file"
|
||
fi
|
||
done | sort -u >> "$elf_list"
|
||
fi
|
||
done
|
||
|
||
local total_files=$(wc -l < "$elf_list" 2>/dev/null || echo 0)
|
||
|
||
# 过滤 ELF 文件
|
||
local elf_filtered="${pkg_dir}/elf_filtered.txt"
|
||
> "$elf_filtered"
|
||
|
||
if [ "$total_files" -gt 0 ]; then
|
||
while IFS= read -r file; do
|
||
if [ -f "$file" ]; then
|
||
# 使用 file 命令检测 ELF 文件
|
||
# 匹配 ELF 32-bit 或 64-bit 可执行文件或共享库
|
||
if file "$file" 2>/dev/null | grep -qE 'ELF.*(executable|shared object)'; then
|
||
echo "$file" >> "$elf_filtered"
|
||
fi
|
||
fi
|
||
done < "$elf_list"
|
||
fi
|
||
|
||
# 扫描
|
||
if [ -s "$elf_filtered" ]; then
|
||
local file_count=$(wc -l < "$elf_filtered")
|
||
|
||
# 使用新版 checksec 语法:checksec file --output=json <file>
|
||
# 构建标准 JSON 数组格式
|
||
> "$result_file"
|
||
echo "[" > "$result_file"
|
||
local first_file=true
|
||
|
||
while IFS= read -r file; do
|
||
if [ -f "$file" ]; then
|
||
# 使用新版 checksec 命令格式
|
||
# 过滤警告信息和错误信息(防止污染 JSON)
|
||
local json_output=$(checksec file --output=json "$file" 2>&1 | grep -v '^Warning:' | grep -v '^Error reading' || echo "")
|
||
|
||
if [ -n "$json_output" ]; then
|
||
# 提取 JSON 数组中的对象(去掉外层的 [])
|
||
# 使用 sed 提取数组内容,并过滤掉错误行
|
||
local json_object=$(echo "$json_output" | sed 's/Warning:.*$//g' | sed 's/^Error.*$//g' | sed 's/^\s*\[\s*//g' | sed 's/\s*\]\s*$//g' | grep -v '^$')
|
||
|
||
if [ -n "$json_object" ]; then
|
||
# 移除完整路径前缀,只保留相对路径
|
||
# 例如: /full/path/extracted/pkg/usr/bin/file -> /usr/bin/file
|
||
local relative_path=$(echo "$file" | sed "s|^${pkg_dir}/|/|")
|
||
|
||
# 替换 JSON 中的 name 字段
|
||
json_object=$(echo "$json_object" | sed "s|\"name\": *\"[^\"]*\"|\"name\": \"${relative_path}\"|")
|
||
|
||
if [ "$first_file" = true ]; then
|
||
echo " $json_object" >> "$result_file"
|
||
first_file=false
|
||
else
|
||
echo " ,$json_object" >> "$result_file"
|
||
fi
|
||
fi
|
||
fi
|
||
fi
|
||
done < "$elf_filtered"
|
||
echo "]" >> "$result_file"
|
||
|
||
# 记录扫描成功(包名已在开头标记,此处只记录统计)
|
||
(
|
||
flock -x 200
|
||
echo "$pkg_name:$file_count" >> "${RESULTS_DIR}/success.txt"
|
||
) 200>"${RESULTS_DIR}/success.txt.lock"
|
||
|
||
log "扫描完成: $pkg_name ($file_count 个文件)"
|
||
else
|
||
# 记录无二进制文件(包名已在开头标记,此处只记录统计)
|
||
(
|
||
flock -x 200
|
||
echo "$pkg_name:no_binaries" >> "${RESULTS_DIR}/no_binary.txt"
|
||
) 200>"${RESULTS_DIR}/no_binary.txt.lock"
|
||
log "无 ELF 文件: $pkg_name"
|
||
fi
|
||
|
||
# trap 会自动清理临时文件(在 EXIT 时触发)
|
||
return 0
|
||
}
|
||
|
||
# 主流程
|
||
main() {
|
||
# 支持单包扫描模式
|
||
if [ $# -gt 0 ] && [ "$1" == "scan_package" ] && [ -n "${2:-}" ]; then
|
||
# 单包扫描模式
|
||
local pkg_name="$2"
|
||
log "=== 单包扫描模式: $pkg_name ==="
|
||
|
||
# 初始化状态文件
|
||
mkdir -p "$RESULTS_DIR"
|
||
touch "${RESULTS_DIR}/scanned.txt"
|
||
touch "${RESULTS_DIR}/success.txt"
|
||
touch "${RESULTS_DIR}/failed.txt"
|
||
touch "${RESULTS_DIR}/no_binary.txt"
|
||
|
||
scan_package "$pkg_name"
|
||
exit $?
|
||
fi
|
||
|
||
# 批量扫描模式
|
||
local package_list="${WORK_DIR}/packages.list"
|
||
|
||
if [ ! -f "$package_list" ]; then
|
||
echo "错误: 包列表不存在,请先运行 01_init.sh"
|
||
exit 1
|
||
fi
|
||
|
||
# 初始化状态文件
|
||
touch "${RESULTS_DIR}/scanned.txt"
|
||
touch "${RESULTS_DIR}/success.txt"
|
||
touch "${RESULTS_DIR}/failed.txt"
|
||
touch "${RESULTS_DIR}/no_binary.txt"
|
||
|
||
log "=== 开始并行扫描 (并发数: $PARALLEL_JOBS) ==="
|
||
|
||
# 导出脚本目录供包装脚本使用
|
||
export CHECKSEC_SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||
|
||
# 导出环境变量供 parallel 使用
|
||
export WORK_DIR RPM_CACHE_DIR EXTRACT_DIR RESULTS_DIR DB_FILE
|
||
export DOWNLOAD_TIMEOUT CHECKSEC_TIMEOUT TARGET_PATHS EXCLUDE_PATTERNS LOG_FILE ERROR_LOG
|
||
|
||
# 创建临时的包装脚本供 parallel 调用
|
||
local wrapper_script="${WORK_DIR}/.scan_wrapper.sh"
|
||
cat > "$wrapper_script" <<'WRAPPER_EOF'
|
||
#!/bin/bash
|
||
set -euo pipefail
|
||
|
||
# 使用导出的脚本目录
|
||
SCRIPT_DIR="$CHECKSEC_SCRIPT_DIR"
|
||
|
||
# 加载所有依赖
|
||
source "$SCRIPT_DIR/config.sh"
|
||
source "$SCRIPT_DIR/common.sh"
|
||
|
||
# 定义日志函数
|
||
log() {
|
||
printf '[%s] %s\n' "$(date '+%Y-%m-%d %H:%M:%S')" "$*" | tee -a "$LOG_FILE"
|
||
}
|
||
|
||
error() {
|
||
printf '[ERROR] %s\n' "$*" | tee -a "$ERROR_LOG" >&2
|
||
}
|
||
|
||
# 重新定义 scan_package 函数(复制自 02_scan.sh)
|
||
scan_package() {
|
||
local pkg_name="$1"
|
||
|
||
# 验证包名格式(防止路径遍历和命令注入)
|
||
if ! validate_package_name "$pkg_name"; then
|
||
error "非法包名格式: $pkg_name"
|
||
log_failure "$pkg_name" "invalid_name"
|
||
return 1
|
||
fi
|
||
|
||
local pkg_dir="${EXTRACT_DIR}/${pkg_name}"
|
||
local result_file="${RESULTS_DIR}/${pkg_name}.json"
|
||
local status_file="${RESULTS_DIR}/scanned.txt"
|
||
local rpm_file=""
|
||
|
||
# 资源清理函数(在函数内定义以访问局部变量)
|
||
cleanup_pkg_dir() {
|
||
# 使用 ${pkg_dir:-} 避免变量未定义时的错误
|
||
if [ -n "${pkg_dir:-}" ] && [ -d "$pkg_dir" ]; then
|
||
rm -rf "$pkg_dir"
|
||
fi
|
||
}
|
||
|
||
# 设置资源清理 trap,确保临时文件在任何情况下都被清理
|
||
trap cleanup_pkg_dir EXIT ERR INT TERM
|
||
|
||
# 使用文件锁检查是否已扫描,并在同一锁内标记为处理中
|
||
# 修复 TOCTOU 竞态条件:检查和标记必须在同一锁区域
|
||
(
|
||
flock -x 200
|
||
if grep -qx "$pkg_name" "$status_file" 2>/dev/null; then
|
||
log "跳过已扫描: $pkg_name"
|
||
exit 0
|
||
fi
|
||
# 立即标记为处理中,防止其他进程重复扫描
|
||
echo "$pkg_name" >> "$status_file"
|
||
) 200>"${status_file}.lock"
|
||
|
||
# 下载包
|
||
mkdir -p "$pkg_dir"
|
||
if ! timeout "$DOWNLOAD_TIMEOUT" dnf download --destdir "$RPM_CACHE_DIR" "$pkg_name" &>/dev/null; then
|
||
error "下载失败: $pkg_name"
|
||
log_failure "$pkg_name" "download_failed"
|
||
return 1
|
||
fi
|
||
|
||
# 解压
|
||
local rpm_file=$(find "$RPM_CACHE_DIR" -name "${pkg_name}.rpm" | head -1)
|
||
if [ -z "$rpm_file" ]; then
|
||
error "找不到 RPM 文件: $pkg_name"
|
||
log_failure "$pkg_name" "not_found"
|
||
return 1
|
||
fi
|
||
|
||
cd "$pkg_dir"
|
||
if ! rpm2cpio "$rpm_file" | cpio -idm 2>/dev/null; then
|
||
error "解压失败: $pkg_name"
|
||
log_failure "$pkg_name" "extract_failed"
|
||
return 1
|
||
fi
|
||
|
||
# 检查文件路径是否应该被排除
|
||
should_exclude_path() {
|
||
local file_path="$1"
|
||
for pattern in "${EXCLUDE_PATHS[@]}"; do
|
||
# 将路径转换为相对于 pkg_dir 的路径进行匹配
|
||
local rel_path="${file_path#$pkg_dir/}"
|
||
# 使用 glob 模式匹配
|
||
case "$rel_path" in
|
||
$pattern)
|
||
return 0
|
||
;;
|
||
esac
|
||
done
|
||
return 1
|
||
}
|
||
|
||
# 查找 ELF 文件(去重)
|
||
local elf_list="${pkg_dir}/elf_files.txt"
|
||
> "$elf_list"
|
||
|
||
# 遍历目标路径查找文件
|
||
for target_path in "${TARGET_PATHS[@]}"; do
|
||
local search_path="${pkg_dir}/${target_path}"
|
||
if [ -d "$search_path" ]; then
|
||
# 直接列出文件,使用 NUL 分隔符处理特殊字符
|
||
find "$search_path" -type f ! -name '*.ko' ! -name '*.ko.xz' -print0 2>/dev/null | \
|
||
while IFS= read -r -d '' file; do
|
||
# 跳过匹配排除路径的文件
|
||
if ! should_exclude_path "$file"; then
|
||
echo "$file"
|
||
fi
|
||
done | sort -u >> "$elf_list"
|
||
fi
|
||
done
|
||
|
||
local total_files=$(wc -l < "$elf_list" 2>/dev/null || echo 0)
|
||
|
||
# 过滤 ELF 文件
|
||
local elf_filtered="${pkg_dir}/elf_filtered.txt"
|
||
> "$elf_filtered"
|
||
|
||
if [ "$total_files" -gt 0 ]; then
|
||
while IFS= read -r file; do
|
||
if [ -f "$file" ]; then
|
||
# 使用 file 命令检测 ELF 文件
|
||
# 匹配 ELF 32-bit 或 64-bit 可执行文件或共享库
|
||
if file "$file" 2>/dev/null | grep -qE 'ELF.*(executable|shared object)'; then
|
||
echo "$file" >> "$elf_filtered"
|
||
fi
|
||
fi
|
||
done < "$elf_list"
|
||
fi
|
||
|
||
# 扫描
|
||
if [ -s "$elf_filtered" ]; then
|
||
local file_count=$(wc -l < "$elf_filtered")
|
||
|
||
# 使用新版 checksec 语法:checksec file --output=json <file>
|
||
# 构建标准 JSON 数组格式
|
||
> "$result_file"
|
||
echo "[" > "$result_file"
|
||
local first_file=true
|
||
|
||
while IFS= read -r file; do
|
||
if [ -f "$file" ]; then
|
||
# 使用新版 checksec 命令格式
|
||
# 过滤警告信息和错误信息(防止污染 JSON)
|
||
local json_output=$(checksec file --output=json "$file" 2>&1 | grep -v '^Warning:' | grep -v '^Error reading' || echo "")
|
||
|
||
if [ -n "$json_output" ]; then
|
||
# 提取 JSON 数组中的对象(去掉外层的 [])
|
||
# 使用 sed 提取数组内容,并过滤掉错误行
|
||
local json_object=$(echo "$json_output" | sed 's/Warning:.*$//g' | sed 's/^Error.*$//g' | sed 's/^\s*\[\s*//g' | sed 's/\s*\]\s*$//g' | grep -v '^$')
|
||
|
||
if [ -n "$json_object" ]; then
|
||
# 移除完整路径前缀,只保留相对路径
|
||
# 例如: /full/path/extracted/pkg/usr/bin/file -> /usr/bin/file
|
||
local relative_path=$(echo "$file" | sed "s|^${pkg_dir}/|/|")
|
||
|
||
# 替换 JSON 中的 name 字段
|
||
json_object=$(echo "$json_object" | sed "s|\"name\": *\"[^\"]*\"|\"name\": \"${relative_path}\"|")
|
||
|
||
if [ "$first_file" = true ]; then
|
||
echo " $json_object" >> "$result_file"
|
||
first_file=false
|
||
else
|
||
echo " ,$json_object" >> "$result_file"
|
||
fi
|
||
fi
|
||
fi
|
||
fi
|
||
done < "$elf_filtered"
|
||
echo "]" >> "$result_file"
|
||
|
||
# 记录扫描成功(包名已在开头标记,此处只记录统计)
|
||
(
|
||
flock -x 200
|
||
echo "$pkg_name:$file_count" >> "${RESULTS_DIR}/success.txt"
|
||
) 200>"${RESULTS_DIR}/success.txt.lock"
|
||
|
||
log "扫描完成: $pkg_name ($file_count 个文件)"
|
||
else
|
||
# 记录无二进制文件(包名已在开头标记,此处只记录统计)
|
||
(
|
||
flock -x 200
|
||
echo "$pkg_name:no_binaries" >> "${RESULTS_DIR}/no_binary.txt"
|
||
) 200>"${RESULTS_DIR}/no_binary.txt.lock"
|
||
log "无 ELF 文件: $pkg_name"
|
||
fi
|
||
|
||
# trap 会自动清理临时文件(在 EXIT 时触发)
|
||
return 0
|
||
}
|
||
|
||
# 调用函数
|
||
scan_package "$1"
|
||
WRAPPER_EOF
|
||
|
||
chmod +x "$wrapper_script"
|
||
|
||
# 使用包装脚本进行并行扫描
|
||
cat "$package_list" | parallel -j "$PARALLEL_JOBS" --joblog "${WORK_DIR}/parallel.log" \
|
||
--retries 2 --bar "$wrapper_script" {}
|
||
|
||
# 清理临时包装脚本
|
||
rm -f "$wrapper_script"
|
||
|
||
log "=== 扫描完成 ==="
|
||
|
||
# 统计结果
|
||
local total=$(wc -l < "$package_list")
|
||
local success=$(wc -l < "${RESULTS_DIR}/success.txt" 2>/dev/null || echo 0)
|
||
local failed=$(wc -l < "${RESULTS_DIR}/failed.txt" 2>/dev/null || echo 0)
|
||
local no_binary=$(wc -l < "${RESULTS_DIR}/no_binary.txt" 2>/dev/null || echo 0)
|
||
|
||
echo ""
|
||
echo "========== 扫描统计 =========="
|
||
echo "总包数: $total"
|
||
echo "扫描成功: $success"
|
||
echo "扫描失败: $failed"
|
||
echo "无二进制: $no_binary"
|
||
echo "=============================="
|
||
|
||
# 提示导入数据库
|
||
echo ""
|
||
echo "下一步:运行以下命令导入数据库"
|
||
echo "python3 04_import_results.py ${DB_FILE} ${RESULTS_DIR}"
|
||
}
|
||
|
||
# 只在直接执行脚本时运行 main 函数
|
||
if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
|
||
main "$@"
|
||
fi
|