Target URL Crawler


#!/usr/bin/env bash

##   listing only target domain with list-urls.py (in backtrack4)
##   by Aung Khant, http://yehg.net

list_url_location=/pentest/enumeration/list-urls/list-urls.py

echo ++++++++++++++++++++++++++++++++++++
echo
echo Target URL Crawler with list-urls.py
echo 
echo by Aung Khant, http://yehg.net
echo YGN Ethical Hacker Group, Myanmar
echo
echo ++++++++++++++++++++++++++++++++++++
echo 
if [ ! -e $list_url_location ]; then
  echo This script depends on:
  echo "$list_url_location" that does not exist !
  echo 
  echo Edit the source to modify list-urls.py location
  exit
fi
if [ $# -ne 1 ]
 then
  echo "Usage: ./$0 url"
  echo 
  echo "e.g ./$0 http://www.google.com"
  exit
fi

echo Crawling ..
echo

#Get Random String
#Ref: http://tldp.org/LDP/abs/html/string-manipulation.html#RANDSTRING

POS=2  # Starting from position 2 in the string.
LEN=8  # Extract eight characters.
str0="$$"
str1=$( echo "$str0" | md5sum | md5sum )
str2=$( date | md5sum | md5sum )

randstring=${str1:$POS:$LEN}
randstring2=${str2:$POS:$LEN}

tmpfile="/tmp/tmp_$randstring"
tmpfile2="/tmp/tmp_$randstring2"

touch $tmpfile2

$list_url_location $1 > $tmpfile

target=$1
domain=$1

p1=`expr match $target "\(http://\)"`

if [ ${#p1} -eq 0 ]; then
  p2=`expr match $target "\(https://\)"`
  if [ ${#p2} -eq 0 ]; then
      echo URL should start with http:// or https://
      exit
  else
    d1=${target:8}
    l1=`expr index $d1 '/'`
    if [ $l1 -le 0 ]; then
        target=$1/
        d1=${target:7}
        l1=`expr index $d1 '/'`
    fi
    l1=`echo $l1-1|bc`
    d2=${d1:0:$l1}
    domain=$d2
    protocol='https://'

  fi
else
    d1=${target:7}
    l1=`expr index $d1 '/'`
    if [ $l1 -le 0 ]; then
        target=$1/
        d1=${target:7}
        l1=`expr index $d1 '/'`
    fi
    l1=`echo $l1-1|bc`
    d2=${d1:0:$l1}
    domain=$d2
    protocol='http://'
fi

count=0
for line in $(cat $tmpfile)
do
  outof_target=`echo $line | grep -i -P "^(http|#|javascript|vbscript)" | wc -l`
  basedir=`echo $line | grep -i -P "^/" | wc -l`

  url=`echo $line | grep '/' | wc -l `

  if [ $outof_target -eq 0 ]; then
      if [ $url -eq 1 ]; then
    if [ ${#line} -gt 1 ]; then
       has_included=`cat $tmpfile2 | grep $line | wc -l`
           line=${line//..\//}

           if [ $has_included -eq 0 ]; then
        if [ $basedir -eq 1 ]; then
             echo ${protocol}${domain}${line}
             echo ${protocol}${domain}${line} >> $tmpfile2
        else
             echo ${target}${line}      
                 echo ${target}${line} >> $tmpfile2
        fi
       fi
    fi
      fi
  fi

done

rm $tmpfile $tmpfile2

Submitted by [[/authors/aung_khant|Aung Khant]]

#!/usr/bin/env bash ## listing only target domain with list-urls.py (in backtrack4) ## by Aung Khant, http://yehg.net list_url_location=/pentest/enumeration/list-urls/list-urls.py echo ++++++++++++++++++++++++++++++++++++ echo echo Target URL Crawler with list-urls.py echo echo by Aung Khant, http://yehg.net echo YGN Ethical Hacker Group, Myanmar echo echo ++++++++++++++++++++++++++++++++++++ echo if [ ! -e $list_url_location ]; then echo This script depends on: echo "$list_url_location" that does not exist ! echo echo Edit the source to modify list-urls.py location exit fi if [ $# -ne 1 ] then echo "Usage: ./$0 url" echo echo "e.g ./$0 http://www.google.com" exit fi echo Crawling .. echo #Get Random String #Ref: http://tldp.org/LDP/abs/html/string-manipulation.html#RANDSTRING POS=2 # Starting from position 2 in the string. LEN=8 # Extract eight characters. str0="$$" str1=$( echo "$str0" | md5sum | md5sum ) str2=$( date | md5sum | md5sum ) randstring=${str1:$POS:$LEN} randstring2=${str2:$POS:$LEN} tmpfile="/tmp/tmp_$randstring" tmpfile2="/tmp/tmp_$randstring2" touch $tmpfile2 $list_url_location $1 > $tmpfile target=$1 domain=$1 p1=`expr match $target "$http://$"` if [ ${#p1} -eq 0 ]; then p2=`expr match $target "$https://$"` if [ ${#p2} -eq 0 ]; then echo URL should start with http:// or https:// exit else d1=${target:8} l1=`expr index $d1 '/'` if [ $l1 -le 0 ]; then target=$1/ d1=${target:7} l1=`expr index $d1 '/'` fi l1=`echo $l1-1|bc` d2=${d1:0:$l1} domain=$d2 protocol='https://' fi else d1=${target:7} l1=`expr index $d1 '/'` if [ $l1 -le 0 ]; then target=$1/ d1=${target:7} l1=`expr index $d1 '/'` fi l1=`echo $l1-1|bc` d2=${d1:0:$l1} domain=$d2 protocol='http://' fi count=0 for line in $(cat $tmpfile) do outof_target=`echo $line | grep -i -P "^(http|#|javascript|vbscript)" | wc -l` basedir=`echo $line | grep -i -P "^/" | wc -l` url=`echo $line | grep '/' | wc -l ` if [ $outof_target -eq 0 ]; then if [ $url -eq 1 ]; then if [ ${#line} -gt 1 ]; then has_included=`cat $tmpfile2 | grep $line | wc -l` line=${line//..\//} if [ $has_included -eq 0 ]; then if [ $basedir -eq 1 ]; then echo ${protocol}${domain}${line} echo ${protocol}${domain}${line} >> $tmpfile2 else echo ${target}${line} echo ${target}${line} >> $tmpfile2 fi fi fi fi fi done rm $tmpfile $tmpfile2

Target URL Crawler

Target URL Crawler

Related Scripts