Friday, April 4, 2014

LeetCode: String to Integer

Implement atoi to convert a string to an integer.

Hint: Carefully consider all possible input cases. If you want a challenge, please do not see below and ask yourself what are the possible input cases.

Notes: It is intended for this problem to be specified vaguely (ie, no given input specs). You are responsible to gather all the input requirements up front.

Requirements for atoi:

The function first discards as many whitespace characters as necessary until the first non-whitespace character is found. Then, starting from this character, takes an optional initial plus or minus sign followed by as many numerical digits as possible, and interprets them as a numerical value.

The string can contain additional characters after those that form the integral number, which are ignored and have no effect on the behavior of this function.

If the first sequence of non-whitespace characters in str is not a valid integral number, or if no such sequence exists because either str is empty or it contains only whitespace characters, no conversion is performed.

If no valid conversion could be performed, a zero value is returned. If the correct value is out of the range of representable values, INT_MAX (2147483647) or INT_MIN (-2147483648) is returned.


The problem is straight, just keep in mind to do bound check.

C++ Code:

/*
 * func: string_to_int
 * goal: convert the string to a integer
 * @param str: a char pointer to the string
 * return: the converted integer
 */
int string_to_int(const char *str){
    //If str is null, return -1
    if(str == nullptr){
        return 0;
    }else{
        //Remove the space at the beginning
        while((*str) == ' '){
            ++str;
        }
        
        //Check positive or negative
        bool positive = true;
        if(*str == '+'){
            positive = true;
            ++str;
        }else if(isdigit(*str)){
            positive = true;
        }else if(*str == '-'){
            positive = false;
            ++str;
        }else{
            //start is not digit or sign, cannot convert to int
            return 0;
        }
        
        //record bound
        int bound = numeric_limits::max()/10;
        
        int ret = 0;
        while(isdigit(*str)){
            //If positive and out of bound, return bound
            if(positive && (ret > bound || (ret == bound && (*str) > '7'))){
                return numeric_limits::max();
            }else if(!positive && (ret > bound || (ret == bound && (*str) == '9'))){
                return numeric_limits::min();
            }
            //Move forward
            ret = ret * 10 + *str - '0';
            ++str;
        }
        return positive ? ret : -ret;
    }
}

Python Code:

# func: convert current string to integer
# @param str: the input string
# @return: the converted integer
def atoi(str):
    if not str:
        return 0
    else:
        #Remove space first
        while str[0] == ' ':
            str = str[1:]

        #Check positive or not
        if str and str[0] == '-':
            positive = False
            str = str[1:]
        elif str and str[0] == '+':
            positive = True
            str = str[1:]
        elif str and str[0] in '0123456789':
            positive = True
        else:
            return 0

        ret = 0
        #Start converting
        i = 0
        while i < len(str) and str[i] in '0123456789':
            #If positive and out of bound, return 2147483647
            if positive and ((ret > 214748364) or (ret == 214748364 and int(str[i]) > 7)):
                return 2147483647
            if not positive and ((ret > 214748364) or (ret == 214748364 and int(str[i]) == 9)):
                return -2147483648

            ret = ret * 10 + int(str[i])
            i += 1

        if positive:
            return ret
        else:
            return ret * -1

Then let us how gcc implemented atoi(). I looked up glibc-2.18(http://ftp.gnu.org/gnu/glibc/) and found that it chooses to convert it to long first and then cast it to int. Since long number casting to int number will lost something and the result is unpredictable due to different platforms.

atoi.c

#include <stdlib.h>

#undef atoi


/* Convert a string to an int.  */
int
atoi (const char *nptr)
{
  return (int) strtol (nptr, (char **) NULL, 10);
}

strtol.c

#include <stdlib.h>
#include <wchar.h>
#include <locale/localeinfo.h>

#ifndef UNSIGNED
# define UNSIGNED 0
# define INT LONG int
#else
# define INT unsigned LONG int
#endif

#if UNSIGNED
# ifdef USE_WIDE_CHAR
#  ifdef QUAD
#   define strtol wcstoull
#   define __strtol_l __wcstoull_l
#  else
#   define strtol wcstoul
#   define __strtol_l __wcstoul_l
#  endif
# else
#  ifdef QUAD
#   define strtol strtoull
#   define __strtol_l __strtoull_l
#  else
#   define strtol strtoul
#   define __strtol_l __strtoul_l
#  endif
# endif
#else
# ifdef USE_WIDE_CHAR
#  ifdef QUAD
#   define strtol wcstoll
#   define __strtol_l __wcstoll_l
#  else
#   define strtol wcstol
#   define __strtol_l __wcstol_l
#  endif
# else
#  ifdef QUAD
#   define strtol strtoll
#   define __strtol_l __strtoll_l
#  endif
# endif
#endif


/* If QUAD is defined, we are defining `strtoll' or `strtoull',
   operating on `long long int's.  */
#ifdef QUAD
# define LONG long long
#else
# define LONG long
#endif


#ifdef USE_WIDE_CHAR
# define STRING_TYPE wchar_t
#else
# define STRING_TYPE char
#endif


#define INTERNAL(X) INTERNAL1(X)
#define INTERNAL1(X) __##X##_internal


extern INT INTERNAL (__strtol_l) (const STRING_TYPE *, STRING_TYPE **, int,
      int, __locale_t);


INT
INTERNAL (strtol) (nptr, endptr, base, group)
     const STRING_TYPE *nptr;
     STRING_TYPE **endptr;
     int base;
     int group;
{
  return INTERNAL (__strtol_l) (nptr, endptr, base, group, _NL_CURRENT_LOCALE);
}
libc_hidden_def (INTERNAL (strtol))


INT
strtol (nptr, endptr, base)
     const STRING_TYPE *nptr;
     STRING_TYPE **endptr;
     int base;
{
  return INTERNAL (__strtol_l) (nptr, endptr, base, 0, _NL_CURRENT_LOCALE);
}
libc_hidden_def (strtol)

No comments:

Post a Comment