在html文本中提取多个手机号码
发表于2020-08-30 15:08:30阅读36050次
今天终于弄明白了各运营商都分配了哪些号码段
* 中国移动:China Mobile
* 134[0-8],135,136,137,138,139,147,150,151,152,157,158,159,,172,178,182,183.184,187,188,195,197,198
*匹配规则:$cm = "/^1(34[0-8]|(3[5-9]|5[0127-9]|8[23478]|47|7[28]|9[578])\d)\d{7}$/";
* 中国联通:China Unicom
* 130,131,132,145,155,156,166,171,175,176,185,186,196
*匹配规则:$cu = "/^1(3[0-2]|5[56]|8[56]|45|66|7[156]|96)\d{8}$/";
* 中国电信:China Telecom
* 133,1349,149,153,173,177,180,181,189,190,191,193,199
*匹配规则: $ct = "/^1((33|53|49|73|77|8[019]|9[0139])[0-9]|349)\d{7}$/";
* 中国广电: China Broadcast Network
* 192
*匹配规则:$cb = "/^1(92)\d{8}$/";
下面这个函数能自动把文本中电话号码提取出来。代码如下:
- /**
- * 在文本中提取出电话号码
- * @param $content 源文本
- * @return string
- */
- function get_tel($content){
- // 检测字符串是否为空
- $content=trim($content);
- $numbers = array();
- if(emptyempty($content)){
- return $numbers;
- }
- $content = str_replace(' ',' ',ltrim(rtrim(strip_tags($content))));
- //删除86-180640741122,0997-8611222之类的号码中间的减号(-)
- $strArr = explode("-", $content);
- $newStr = $strArr[0];
- for ($i=1; $i < count($strArr); $i++) {
- if (preg_match("/\d{2}$/", $newStr) && preg_match("/^\d{11}/", $strArr[$i])){
- $newStr .= $strArr[$i];
- } elseif (preg_match("/\d{3,4}$/", $newStr) && preg_match("/^\d{7,8}/", $strArr[$i])) {
- $newStr .= $strArr[$i];
- } else {
- $newStr .= "-".$strArr[$i];
- }
- }
- // 手机号的获取
- $reg='/\D(?:86)?(\d{11})\D/is';//匹配数字的正则表达式
- preg_match_all($reg,$newStr,$result);
- $nums = array();
- // * 中国移动:China Mobile
- // * 134[0-8],135,136,137,138,139,147,150,151,152,157,158,159,,172,178,182,183.184,187,188,195,197,198
- $cm = "/^1(34[0-8]|(3[5-9]|5[0127-9]|8[23478]|47|7[28]|9[578])\d)\d{7}$/";
- // * 中国联通:China Unicom
- // * 130,131,132,145,155,156,166,171,175,176,185,186,196
- $cu = "/^1(3[0-2]|5[56]|8[56]|45|66|7[156]|96)\d{8}$/";
- // * 中国电信:China Telecom
- // * 133,1349,149,153,173,177,180,181,189,190,191,193,199
- $ct = "/^1((33|53|49|73|77|8[019]|9[0139])[0-9]|349)\d{7}$/";
- // * 中国广电: China Broadcast Network
- // * 192
- $cb = "/^1(92)\d{8}$/";
- //
- foreach ($result[1] as $key => $value) {
- if(preg_match($cm,$value)){
- $nums[] = array("number" => $value, "type" => "中国移动");
- }elseif(preg_match($cu,$value)){
- $nums[] = array("number" => $value, "type" => "中国联通");
- }elseif(preg_match($ct,$value)){
- $nums[] = array("number" => $value, "type" => "中国电信");
- }elseif (preg_match($cb,$value)) {
- $nums[] = array("number" => $value, "type" => "中国广电");
- }else{
- // 非法号码
- }
- }
- $numbers["mobile"] = $nums;
- // 固定电话或小灵通的获取
- $reg='/\D(0\d{10,12})\D/is';//匹配数字的正则表达式
- preg_match_all($reg,$newStr,$result);
- $nums = array();
- // * 大陆地区固定电话或小灵通
- // * 区号:010,020,021,022,023,024,025,027,028,029
- // * 号码:七位或八位
- $phs = "/^0(10|2[0-5789]|\d{3})\d{7,8}$/";
- foreach ($result[1] as $key => $value) {
- if(preg_match($phs, $value)){
- $nums[] = array("number" => $value, "type" => "固定电话或小灵通");
- } else {
- // 非法
- }
- }
- $numbers["landline"] = $nums;
- // 有可能是没有区号的固定电话的获取
- $reg='/\D(\d{7,8})\D/is';//匹配数字的正则表达式
- preg_match_all($reg,$newStr,$result);
- $nums = array();
- foreach ($result[1] as $key => $value) {
- $nums[] = array("number" => $value, "type" => "没有区号的固定电话");
- }
- $numbers["possible"] = $nums;
- // 返回最终数组
- return $numbers;
- }
运行结果: