用php导入10W条+ 级别的csv大文件数据到mysql。导出10W+级别数据到csv文件
前言:
如题。我们这里说的是csv文件的行数达到10W以上。这个是一个难点。
1. 说说csv 和 Excel
这两者都是我们平时导出或者导入数据一般用到的载体。两者有什么区别呢?csv 格式更兼容一点。那么共同点都是GBK格式的,非UTF8。所以我们上传文件的时候,老是出现乱码,就是编码问题没有转好导致。2. 推荐的几种方法
1. 函数 fgetss($handel); 返回字符串。它就是strip_tags(fget($handel))的组合读取csv 文件的一行。去掉了其中的HTML,php 等标签。这种方法使用于小的csv 文件$handle=fopen("1.csv","r"); while(!feof($handle)){ $buffer=fgetss($handle,2048); $row = mb_convert_encoding(trim($buffer), "utf-8", "gbk"); //很重要。转换成UTF8格式,不然容易产生乱码 $data=explode(",",$row); //转换成数组 $insertRows[] = $data; } //这样所有的csv文件就生成一个二维数组$insertRows;
2. 函数 fgetcsv($handel,2048,","),返回数组,它就是explode(",",fget($handel))的组合。这种方法使用于小的csv 文件。而且不适合有汉字的csv 文件。
$handle=fopen("1.csv","r"); while($data=fgetcsv($handle,1000,",")){ $insertRows[] = $data; } //这样所有的csv文件就生成一个二维数组$insertRows;
3. 说到正题了。上面2种方法适合少量的csv 文件。如果一个文件有10W+ 以上的行数,恐怕就很悲剧了。
$handle=fopen("1.csv","r"); //将文件一次性全部读出来 $excelData = array(); $content = trim(file_get_contents($fileName)); $excelData = explode(" ",$content);
或者直接用$excelData = file($file); file() 函数直接将数据读出并放入数组当中。
我们先将所有的文件一次性读出来。放到一个数组$excelData 中。这个时候,我们就可以不用管这个csv 文件了,纯粹了php 操作数组了。所以。不会崩溃异常:
$chunkData = array_chunk($excelData , 5000); // 将这个10W+ 的数组分割成5000一个的小数组。这样就一次批量插入5000条数据。mysql 是支持的。 $count = count($chunkData); for ($i = 0; $i < $count; $i++) { $insertRows = array(); foreach($chunkData[$i] as $value){ $string = mb_convert_encoding(trim(strip_tags($value)), "utf-8", "gbk");//转码 $v = explode(",", trim($string)); $row = array(); $row["cdate"] = empty($v[0]) ? date("Y-m-d") : date("Y-m-d",strtotime($v[0])); $row["business"] = $v[1]; $row["project"] = $v[2]; $row["shopname"] = $v[3]; $row["shopid"] = $v[4]; $row["fanli"] = formatNumber($v[5]); $row["fb"] = $v[6] * 100; $row["jifen"] = $v[7]; $sqlString = "(".""".implode( "","", $row ) . """.")"; //批量 $insertRows[] = $sqlString; } $result = $model->addDetail($insertRows); //批量将sql插入数据库。 }
插入数据库当然是批量插入了:
public function addDetail($rows){ if(empty($rows)){ return false; } //数据量较大,采取批量插入 $data = implode(",", $rows); $sql = "INSERT IGNORE INTO tb_account_detail(cdate,business,project,shopname,shopid,fanli,fb,jifen) VALUES {$data}"; $result = $this->query($sql); return true; }
ok ! 亲测试。10W 数据。6个字段。插入需要10秒。
-----------------2013-12-18 日更新---------------------
3. 导出10W+的数据到csv
放弃之前写的一篇博客中用到的方法:http://blog.csdn.net/think2me/article/details/8596833 。原因是:当超过50W+ 以上的数据时,有可能浏览器崩溃,内存超。这个方法是写文件的方式。然后再把文件弹出下载。
public function dump2Excel() { set_time_limit(0); ini_set("memory_limit", "640M"); //获取列表 $name = $this->getActionName(); $model = D(GROUP_NAME . "." . $name); $map = $this->_search(); //文件名 if (isset($_GET["error"]) && $_GET["error"] > 0) { $filename = C("IMG_PATH")."account_data_error_" . $map["action_id"] . "_" . date("Y-m-d", mktime()) . ".csv"; }else{ $filename = C("IMG_PATH")."account_data_all_" . $map["action_id"] . "_" . date("Y-m-d", mktime()) . ".csv"; } //用户信息,商家ID,联盟,商家订单号,商品分类,确认类别,下单时间,完成时间, //实际支付金额,佣金,佣金补贴,返利,F币,论坛积分,备注,强制入库 // "user_info", "shopid", "league", "order_id", "classify", "confirm_type", //"buydate", "paydate", "real_pay", "commission", "commission_plus", // "fanli", "jifen", "bbs", "remarks", "persist_execute", "unique_sign","error_code" $header[] = iconv("utf-8", "gb2312", "用户信息"); $header[] = iconv("utf-8", "gb2312", "商家ID"); $header[] = iconv("utf-8", "gb2312", "联盟"); $header[] = iconv("utf-8", "gb2312", "商家订单号"); $header[] = iconv("utf-8", "gb2312", "商品分类"); $header[] = iconv("utf-8", "gb2312", "确认类别"); $header[] = iconv("utf-8", "gb2312", "下单时间"); $header[] = iconv("utf-8", "gb2312", "完成时间"); $header[] = iconv("utf-8", "gb2312", "实际支付金额"); $header[] = iconv("utf-8", "gb2312", "佣金"); $header[] = iconv("utf-8", "gb2312", "佣金补贴"); $header[] = iconv("utf-8", "gb2312", "返利"); $header[] = iconv("utf-8", "gb2312", "F币"); $header[] = iconv("utf-8", "gb2312", "论坛积分"); $header[] = iconv("utf-8", "gb2312", "备注"); $header[] = iconv("utf-8", "gb2312", "强制入库"); $header[] = iconv("utf-8", "gb2312", "唯一标识"); $header[] = iconv("utf-8", "gb2312", "错误信息"); $headerFile = implode(",", $header); //写入标题 @unlink($filename); file_put_contents($filename, $headerFile." "); //获取所有error_code $list = D("Fanli")->table("tb_account_action_data_error_code")->field("id,err_msg")->findAll(); $error_msg = array(); foreach ($list as $value) { $error_msg[$value["id"]] = $value["err_msg"]; } //导入错误的数据 if (isset($_GET["error"]) && $_GET["error"] > 0) { $map["error_code"] = array("gt", 0); } if (!empty($map["action_id"])) { $allCount = $model->where($map)->field("count(1) as count")->select(); $pageLimit = ceil($allCount[0]["count"]/self::PAGE_COUNT); $voList = array(); //打开文件 if (!$handle = fopen($filename, "a")) { echo "不能打开文件 $filename"; exit; } //分页获取 for($i=0;$i<$pageLimit;$i++){ $count = self::PAGE_COUNT; $start = $count * $i; $limit = "$start,$count"; $voList = $model->where($map)->limit($limit)->order("id desc")->findAll(); //写入文件 $excelString = array(); foreach ($voList as $v) { $dumpExcel = array(); $dumpExcel[] = mb_convert_encoding($v["user_info"], "GBK", "UTF-8"); $dumpExcel[] = mb_convert_encoding($v["shopid"], "GBK", "UTF-8"); $dumpExcel[] = mb_convert_encoding($v["league"], "GBK", "UTF-8"); $dumpExcel[] = mb_convert_encoding($v["order_id"], "GBK", "UTF-8"); $dumpExcel[] = mb_convert_encoding($v["classify"], "GBK", "UTF-8"); $dumpExcel[] = mb_convert_encoding($v["confirm_type"], "GBK", "UTF-8"); $dumpExcel[] = """.mb_convert_encoding($v["buydate"], "GBK", "UTF-8"); $dumpExcel[] = """.mb_convert_encoding($v["paydate"], "GBK", "UTF-8"); $dumpExcel[] = mb_convert_encoding($v["real_pay"], "GBK", "UTF-8"); $dumpExcel[] = mb_convert_encoding($v["commission"], "GBK", "UTF-8"); $dumpExcel[] = mb_convert_encoding($v["commission_plus"], "GBK", "UTF-8"); $dumpExcel[] = mb_convert_encoding($v["fanli"], "GBK", "UTF-8"); $dumpExcel[] = mb_convert_encoding($v["jifen"], "GBK", "UTF-8"); $dumpExcel[] = mb_convert_encoding($v["bbs"], "GBK", "UTF-8"); $dumpExcel[] = mb_convert_encoding($v["remarks"], "GBK", "UTF-8"); $dumpExcel[] = intval($v["persist_execute"]); $dumpExcel[] = mb_convert_encoding($v["unique_sign"], "GBK", "UTF-8"); $dumpExcel[] = mb_convert_encoding($error_msg[$v["error_code"]], "GBK", "UTF-8"); $excelString[] = implode(",",$dumpExcel); } //只能一行行些。不然容易漏 foreach($excelString as $content){ fwrite($handle, $content . " "); } unset($excelString); } fclose($handle); } //导出下载 header("Content-type: application/octet-stream"); header("Content-Disposition: attachment; filename="" . basename($filename) . """); header("Content-Length: ". filesize($filename)); readfile($filename); }
声明:该文观点仅代表作者本人,入门客AI创业平台信息发布平台仅提供信息存储空间服务,如有疑问请联系rumenke@qq.com。
- 上一篇: swoole深入学习 1. swoole初始
- 下一篇:没有了