obsidian | avz@elpolla.com

Hecho y refinado con ChatGPT usando [[Notas/Prompts/dirscan|estos prompts]].
#!/bin/bash
BLUE='\033[34m'
GREEN='\033[32m'
GRAY='\033[90m'
NC='\033[0m'

if [[ -z $1 ]]; then
    echo "$0: Check for visible directories in the website."
    echo "Usage: $0 <website IP or domain>"
    exit 1
fi

OUTPUT_FILE="$(date +%s).txt"

# Variables
IP=$1
DOMAIN="http://$IP"
SCANNED_URLS=()
UNIQUE_LINKS=()

# Function to extract and scan links from a given URL
scan() {
    local url=$1

    # Add URL to scanned list
    SCANNED_URLS+=("$url")

    # Check if URL is a static resource (CSS, JS, images, etc.) and skip if true
    if [[ "$url" =~ \.(css|js|png|jpg|jpeg|gif|svg|ico|apk)$ ]]; then
        return
    fi

    # Skip index files to avoid redundant scanning
    if [[ "$url" =~ (/index\.html|/index\.htm|/index\.php)$ ]]; then
        return
    fi

    # Fetch the content of the URL and remove null bytes
    content=$(curl -s "$url" | tr -d '\0')

    # Extract all URLs from the HTML content (href and src attributes)
    links=$(echo "$content" | grep -oP '(?<=href=")[^"]+|(?<=src=")[^"]+')

    # Extract URLs from CSS content (url() references)
    css_links=$(echo "$content" | grep -oP '(?<=url\().+?(?=\))')

    # Combine links
    all_links=$(echo -e "$links\n$css_links" | sort -u)

    # Process each link found
    for link in $all_links; do
        # If the link contains a hash (#), skip it
        if [[ "$link" == *"#"* ]]; then
            continue
        fi

        # If the link is relative, convert it to absolute
        if [[ "$link" != http* ]]; then
            if [[ "$link" == /* ]]; then
                link="$DOMAIN$link"
            else
                link="$url/$link"
            fi
        fi

        # Skip external links (ensure they start with the same domain)
        if [[ "$link" != "$DOMAIN"* ]]; then
            continue
        fi

        # Check if the link is already in the UNIQUE_LINKS array
        if [[ ! " ${UNIQUE_LINKS[@]} " =~ " ${link} " ]]; then
            echo -e "${BLUE}${link}${NC}"
            UNIQUE_LINKS+=("$link")
            echo "$link" >> "$OUTPUT_FILE"

            # Check if the link hasn't been scanned before to avoid infinite loops
            if [[ ! " ${SCANNED_URLS[@]} " =~ " ${link} " ]]; then
                # Recursively scan the link
                scan "$link"
            fi
        fi
    done
}

# Function to output all folders found in the scan
folders() {
    local unique_folders=()

    # Extract all folders from found links in the output file
    folders=$(sed 's|/[^/]*$||' "$OUTPUT_FILE" | sort -u)

    for folder in $folders; do
        if [[ ! " ${unique_folders[@]} " =~ " ${folder} " ]]; then
            unique_folders+=("$folder")
            echo -e "${GREEN}${folder}${NC}" | tee -a "$OUTPUT_FILE"
        fi
    done
}

# Start scanning
echo -e "${GRAY}Scanning $DOMAIN${NC}"
scan "$DOMAIN"

# Output the folders
echo ""
echo -e "${GRAY}Folders found${NC}"
folders