Revision: 18518
Updated Code
at October 2, 2009 03:57 by dreadwarrior
Updated Code
<?php $search_replace_mapping = array( // country prefix normalization '+00' => '+', '++' => '+', // country prefix is always 00 '+' => '00', // funny user input goulash 'i' => '1', 'I' => '1', 'l' => '1', 'o' => '0', 'O' => '0', // ([^\diIloO\+]*) // ...brackets '(' => '', ')' => '', '[' => '', ']' => '', '[' => '', ']' => '', // slashes '/' => '', '\\\\' => '', // dashes '-' => '', '_' => '', // whitespaces ' ' => '' ); function normalizeTelephoneNumber($telephone_number, $search_replace_mapping) { // fetch search and replace arrays $search = array_keys($search_replace_mapping); $replace = array_values($search_replace_mapping); // simple string replacement $telephone_number = str_replace($search, $replace, $telephone_number); // lets kick out all dutty stuff which is left... $telephone_number = preg_replace('~[^\d]~', '', $telephone_number); return $telephone_number; } function generateSqlReplaceStatement($telephone_number, $search_replace_mapping) { $s = ''; $template = 'REPLACE(%s, \'%s\', \'%s\')'; $i = 0; foreach ($search_replace_mapping as $search => $replace) { $s = sprintf($template, ($i == 0 ? '\''. $telephone_number .'\'' : $s), $search, $replace); $i++; } $s = 'SELECT '. $s .' AS normalized FROM Accommodation'; return $s; } $numbers = array( '0049 03831 667 156', '+39 0471 / 975 642', '+0039 6757 - 3939 9393', '+49 (0) 3834 50 77 73', '+43 (i) 4m 4n idiOt', '+44 (0) 1234 \\ 55 55' ); print_r($numbers); $numbers_clean = array(); foreach ($numbers as $number) { $numbers_clean[] = normalizeTelephoneNumber($number, $search_replace_mapping); } print_r($numbers_clean); $sql = array(); foreach ($numbers as $number) { $sql[$number] = generateSqlReplaceStatement($number, $search_replace_mapping); } print_r($sql); ?>
Revision: 18517
Initial Code
Initial URL
Initial Description
Initial Title
Initial Tags
Initial Language
at October 1, 2009 11:44 by dreadwarrior
Initial Code
<?php function normalizeTelephoneNumber($telephone_number) { $search_replace_mapping = array( // country prefix normalization '+00' => '+', '++' => '+', // country prefix is always 00 '+' => '00', // funny user input goulash 'i' => '1', 'I' => '1', 'l' => '1', 'o' => '0', 'O' => '0', // ...brackets '(' => '', ')' => '', '[' => '', ']' => '', '[' => '', ']' => '', // slashes '/' => '', '\\' => '', // dashes '-' => '', '_' => '', // whitespaces ' ' => '' ); // fetch search and replace arrays $search = array_keys($search_replace_mapping); $replace = array_values($search_replace_mapping); // simple string replacement $telephone_number = str_replace($search, $replace, $telephone_number); // lets kick out all dutty stuff which is left... $telephone_number = preg_replace('~[^\d]~', '', $telephone_number); return $telephone_number; } $numbers = array( '0049 03832 647 352', '+39 0471 / 975 642', '+0039 6757 - 3939 9393', '+49 (0) 3831 50 77 73', '+43 (i) 4m 4n idiOt' ); print_r($numbers); $numbers_clean = array(); foreach ($numbers as $number) { $numbers_clean[] = normalizeTelephoneNumber($number); } print_r($numbers_clean); ?>
Initial URL
Initial Description
This snippet is usable for telephone number normalization. It supports re-mapping of misused alphabetical characters (e.g. i for 1) and some other clean up stuff... Take a look. Update, 2009-10-02: added function to create a corresponding MySQL REPLACE construct to normalize data which is already stored in database. I decided to use this approach because MySQL's regular expression engine is very slow. TODO: add the possibility to remove non digit characters at the end of the laaaarge REPLACE call.
Initial Title
Normalize telephone numbers
Initial Tags
sql
Initial Language
PHP