Return to Snippet

Revision: 18518
at October 2, 2009 03:57 by dreadwarrior


Updated Code
<?php
  $search_replace_mapping = array(
    // country prefix normalization
    '+00' => '+', '++' => '+',
    // country prefix is always 00
    '+' => '00',
    // funny user input goulash
    'i' => '1', 'I' => '1', 'l' => '1',
    'o' => '0', 'O' => '0',

    // ([^\diIloO\+]*)
    // ...brackets
    '(' => '', ')' => '',
    '[' => '', ']' => '',
    '[' => '', ']' => '',
    // slashes
    '/' => '', '\\\\' => '',
    // dashes
    '-' => '', '_' => '',
    // whitespaces
    ' ' => ''
  );

  function normalizeTelephoneNumber($telephone_number, $search_replace_mapping) {
    // fetch search and replace arrays
    $search = array_keys($search_replace_mapping);
    $replace = array_values($search_replace_mapping);

    // simple string replacement
    $telephone_number = str_replace($search, $replace, $telephone_number);

    // lets kick out all dutty stuff which is left...
    $telephone_number = preg_replace('~[^\d]~', '', $telephone_number);

    return $telephone_number;
  }

  function generateSqlReplaceStatement($telephone_number, $search_replace_mapping)
  {
    $s = '';
    $template = 'REPLACE(%s, \'%s\', \'%s\')';

    $i = 0;
    foreach ($search_replace_mapping as $search => $replace)
    {
      $s = sprintf($template, ($i == 0 ? '\''. $telephone_number .'\'' : $s), $search, $replace);
      $i++;
    }

    $s = 'SELECT '. $s .' AS normalized FROM Accommodation';

    return $s;
  }

  $numbers = array(
    '0049 03831 667 156',
    '+39 0471 / 975     642',
    '+0039 6757 - 3939 9393',
    '+49 (0) 3834 50 77 73',
    '+43 (i) 4m 4n idiOt',
    '+44 (0) 1234 \\ 55 55'
  );

  print_r($numbers);

  $numbers_clean = array();
  foreach ($numbers as $number)
  {
    $numbers_clean[] = normalizeTelephoneNumber($number, $search_replace_mapping);
  }

  print_r($numbers_clean);

  $sql = array();
  foreach ($numbers as $number)
  {
    $sql[$number] = generateSqlReplaceStatement($number, $search_replace_mapping);
  }

  print_r($sql);
?>

Revision: 18517
at October 1, 2009 11:44 by dreadwarrior


Initial Code
<?php
  function normalizeTelephoneNumber($telephone_number) {
    $search_replace_mapping = array(
      // country prefix normalization
      '+00' => '+', '++' => '+',
      // country prefix is always 00
      '+' => '00',
      // funny user input goulash
      'i' => '1', 'I' => '1', 'l' => '1',
      'o' => '0', 'O' => '0',
      // ...brackets
      '(' => '', ')' => '',
      '[' => '', ']' => '',
      '[' => '', ']' => '',
      // slashes
      '/' => '', '\\' => '',
      // dashes
      '-' => '', '_' => '',
      // whitespaces
      ' ' => ''
    );

    // fetch search and replace arrays
    $search = array_keys($search_replace_mapping);
    $replace = array_values($search_replace_mapping);

    // simple string replacement
    $telephone_number = str_replace($search, $replace, $telephone_number);

    // lets kick out all dutty stuff which is left...
    $telephone_number = preg_replace('~[^\d]~', '', $telephone_number);

    return $telephone_number;
  }

  $numbers = array(
    '0049 03832 647 352',
    '+39 0471 / 975     642',
    '+0039 6757 - 3939 9393',
    '+49 (0) 3831 50 77 73',
    '+43 (i) 4m 4n idiOt'
  );

  print_r($numbers);

  $numbers_clean = array();
  foreach ($numbers as $number)
  {
    $numbers_clean[] = normalizeTelephoneNumber($number);
  }

  print_r($numbers_clean);
?>

Initial URL

                                

Initial Description
This snippet is usable for telephone number normalization. It supports re-mapping of misused alphabetical characters (e.g. i for 1) and some other clean up stuff... Take a look.

Update, 2009-10-02: added function to create a corresponding MySQL REPLACE construct to normalize data which is already stored in database. I decided to use this approach because MySQL's regular expression engine is very slow. TODO: add the possibility to remove non digit characters at the end of the laaaarge REPLACE call.

Initial Title
Normalize telephone numbers

Initial Tags
sql

Initial Language
PHP