Target URL Crawler


#!/usr/bin/env bash

##   listing only target domain with list-urls.py (in backtrack4)
##   by Aung Khant, http://yehg.net

list_url_location=/pentest/enumeration/list-urls/list-urls.py

echo ++++++++++++++++++++++++++++++++++++
echo
echo Target URL Crawler with list-urls.py
echo 
echo by Aung Khant, http://yehg.net
echo YGN Ethical Hacker Group, Myanmar
echo
echo ++++++++++++++++++++++++++++++++++++
echo 
if [ ! -e $list_url_location ]; then
  echo This script depends on:
  echo "$list_url_location" that does not exist !
  echo 
  echo Edit the source to modify list-urls.py location
  exit
fi
if [ $# -ne 1 ]
 then
  echo "Usage: ./$0 url"
  echo 
  echo "e.g ./$0 http://www.google.com"
  exit
fi

echo Crawling ..
echo

#Get Random String
#Ref: http://tldp.org/LDP/abs/html/string-manipulation.html#RANDSTRING

POS=2  # Starting from position 2 in the string.
LEN=8  # Extract eight characters.
str0="$$"
str1=$( echo "$str0" | md5sum | md5sum )
str2=$( date | md5sum | md5sum )

randstring=${str1:$POS:$LEN}
randstring2=${str2:$POS:$LEN}

tmpfile="/tmp/tmp_$randstring"
tmpfile2="/tmp/tmp_$randstring2"

touch $tmpfile2

$list_url_location $1 > $tmpfile

target=$1
domain=$1

p1=`expr match $target "\(http://\)"`

if [ ${#p1} -eq 0 ]; then
  p2=`expr match $target "\(https://\)"`
  if [ ${#p2} -eq 0 ]; then
      echo URL should start with http:// or https://
      exit
  else
    d1=${target:8}
    l1=`expr index $d1 '/'`
    if [ $l1 -le 0 ]; then
        target=$1/
        d1=${target:7}
        l1=`expr index $d1 '/'`
    fi
    l1=`echo $l1-1|bc`
    d2=${d1:0:$l1}
    domain=$d2
    protocol='https://'

  fi
else
    d1=${target:7}
    l1=`expr index $d1 '/'`
    if [ $l1 -le 0 ]; then
        target=$1/
        d1=${target:7}
        l1=`expr index $d1 '/'`
    fi
    l1=`echo $l1-1|bc`
    d2=${d1:0:$l1}
    domain=$d2
    protocol='http://'
fi

count=0
for line in $(cat $tmpfile)
do
  outof_target=`echo $line | grep -i -P "^(http|#|javascript|vbscript)" | wc -l`
  basedir=`echo $line | grep -i -P "^/" | wc -l`

  url=`echo $line | grep '/' | wc -l `

  if [ $outof_target -eq 0 ]; then
      if [ $url -eq 1 ]; then
    if [ ${#line} -gt 1 ]; then
       has_included=`cat $tmpfile2 | grep $line | wc -l`
           line=${line//..\//}

           if [ $has_included -eq 0 ]; then
        if [ $basedir -eq 1 ]; then
             echo ${protocol}${domain}${line}
             echo ${protocol}${domain}${line} >> $tmpfile2
        else
             echo ${target}${line}      
                 echo ${target}${line} >> $tmpfile2
        fi
       fi
    fi
      fi
  fi

done

rm $tmpfile $tmpfile2

{{:discovery:list-target.sh.gz|Download}}

Submitted by [[/authors/aung_khant|Aung Khant]]