Target URL Crawler
#!/usr/bin/env bash
## listing only target domain with list-urls.py (in backtrack4)
## by Aung Khant, http://yehg.net
list_url_location=/pentest/enumeration/list-urls/list-urls.py
echo ++++++++++++++++++++++++++++++++++++
echo
echo Target URL Crawler with list-urls.py
echo
echo by Aung Khant, http://yehg.net
echo YGN Ethical Hacker Group, Myanmar
echo
echo ++++++++++++++++++++++++++++++++++++
echo
if [ ! -e $list_url_location ]; then
echo This script depends on:
echo "$list_url_location" that does not exist !
echo
echo Edit the source to modify list-urls.py location
exit
fi
if [ $# -ne 1 ]
then
echo "Usage: ./$0 url"
echo
echo "e.g ./$0 http://www.google.com"
exit
fi
echo Crawling ..
echo
#Get Random String
#Ref: http://tldp.org/LDP/abs/html/string-manipulation.html#RANDSTRING
POS=2 # Starting from position 2 in the string.
LEN=8 # Extract eight characters.
str0="$$"
str1=$( echo "$str0" | md5sum | md5sum )
str2=$( date | md5sum | md5sum )
randstring=${str1:$POS:$LEN}
randstring2=${str2:$POS:$LEN}
tmpfile="/tmp/tmp_$randstring"
tmpfile2="/tmp/tmp_$randstring2"
touch $tmpfile2
$list_url_location $1 > $tmpfile
target=$1
domain=$1
p1=`expr match $target "\(http://\)"`
if [ ${#p1} -eq 0 ]; then
p2=`expr match $target "\(https://\)"`
if [ ${#p2} -eq 0 ]; then
echo URL should start with http:// or https://
exit
else
d1=${target:8}
l1=`expr index $d1 '/'`
if [ $l1 -le 0 ]; then
target=$1/
d1=${target:7}
l1=`expr index $d1 '/'`
fi
l1=`echo $l1-1|bc`
d2=${d1:0:$l1}
domain=$d2
protocol='https://'
fi
else
d1=${target:7}
l1=`expr index $d1 '/'`
if [ $l1 -le 0 ]; then
target=$1/
d1=${target:7}
l1=`expr index $d1 '/'`
fi
l1=`echo $l1-1|bc`
d2=${d1:0:$l1}
domain=$d2
protocol='http://'
fi
count=0
for line in $(cat $tmpfile)
do
outof_target=`echo $line | grep -i -P "^(http|#|javascript|vbscript)" | wc -l`
basedir=`echo $line | grep -i -P "^/" | wc -l`
url=`echo $line | grep '/' | wc -l `
if [ $outof_target -eq 0 ]; then
if [ $url -eq 1 ]; then
if [ ${#line} -gt 1 ]; then
has_included=`cat $tmpfile2 | grep $line | wc -l`
line=${line//..\//}
if [ $has_included -eq 0 ]; then
if [ $basedir -eq 1 ]; then
echo ${protocol}${domain}${line}
echo ${protocol}${domain}${line} >> $tmpfile2
else
echo ${target}${line}
echo ${target}${line} >> $tmpfile2
fi
fi
fi
fi
fi
done
rm $tmpfile $tmpfile2
{{:discovery:list-target.sh.gz|Download}}
Submitted by [[/authors/aung_khant|Aung Khant]]