|
回复 #540 yulihua49 的帖子
现全文发表t_OAD程序,处理任意的表,是否够简单,有何需要改进?大家提意见。
- #include <DAU.h>
- #include <OAD.h>
- #define BATCH_NUM 1000
- /* 插入重码就修改
- 返回小于0出错,在SQL_Connect里
- >=0插入的条数
- */
- static int upd,loss;
- //批量插入,遇重码修改之。在很多重码的场合,效率相当低
- int insert_DAO(OAD *oad,int n,DAU *DP)
- {
- char *p;
- char stmt[4096];
- int cc,ret,num;
- int beg;
- beg=
- ret=num=0;
- do {
- ret=OAD_exec(oad,beg,n);//真正的数组插入,就这一句。其余是错误处理
- if(ret>0) num+=ret;
- if(ret == n-beg) return num;
- if(oad->SQL_Connect->Errno != DUPKEY) {
- ShowLog(1,"%s:OAD_exec err=%d,%s",__FUNCTION__,
- oad->SQL_Connect->Errno,
- oad->SQL_Connect->ErrMsg);
- return ret;
- }
- if(ret<0) ret=0;
- p=(char *)oad->recs;
- p+=(beg+ret)*(oad->reclen);
- memcpy(DP->srm.rec,p,oad->reclen);
- *stmt=0;
- cc=update_by_PK(DP,stmt);
- if(cc<=0) {
- ShowLog(1,"%s update stmt=%s,err=%d,%s",__FUNCTION__,
- stmt,
- oad->SQL_Connect->Errno,
- oad->SQL_Connect->ErrMsg);
- return 0;
- }
- upd+=cc;
- beg+=ret+1;
- //ShowLog(5,"%s:beg=%d",__FUNCTION__,beg);
- } while(beg<n);
- return num;
- }
- int loadfile(T_SQL_Connect *SQL_Connect,char *tablename,FILE *ifd,FILE *ofd,int Pflg,char *buf,int buflen)
- {
- char *p,tabn[512];
- DAU _DAU;
- OAD oad;
- int rows,ret;
- int n,num;
- INT64 now;
- ShowLog(5,"loadfile:entry");
- now=now_usec();
- ret=DAU_init(&_DAU,SQL_Connect,tablename,0,0);
- if(ret) {
- ShowLog(1,"loadfile:DAU_init tabname=%s,ret=%d",tablename,ret);
- return -1;
- }
- num=_DAU.srm.tp[_DAU.srm.Aflg].offset;
- char recs[BATCH_NUM * num];
- ShowLog(5,"DAU_init ret=%d,TIMEVAL=%ld,reclen=%d,size=%d",ret,now_usec()-now,num,sizeof(recs));
- OAD_init(&oad,&_DAU,recs,BATCH_NUM);
- upd=loss=0;
- *buf=0;
- ret=OAD_mk_ins(&oad,buf);
- if(ret) {
- ShowLog(1,"aft OAD_mk_ins:stmt=%s,err=%d,%s\n",buf,
- SQL_Connect->Errno,
- SQL_Connect->ErrMsg);
- OAD_free(&oad);
- DAU_free(&_DAU);
- return -1;
- }
- n=num=0;
- now=now_usec();
- for(rows=0;!ferror(ifd);) {
- fgets(buf,buflen,ifd);
- if(feof(ifd)) break;
- OAD_pkg_dispack(&oad,n,buf,'|');
- if(BATCH_NUM == ++n) {
- ret=insert_DAO(&oad,n,&_DAU);
- ___SQL_Transaction__(SQL_Connect,TRANCOMMIT);
- n=0;
- if(ret<0) break;
- rows += ret;
- }
- }
- if(n) {
- ret=insert_DAO(&oad,n,&_DAU);
- if(ret>0) rows += ret;
- n=0;
- }
- ShowLog(2,"loadfile:rows=%d,upd=%d,TIMEVAL=%lld",rows,upd,now_usec()-now);
- OAD_free(&oad);
- DAU_free(&_DAU);
- return rows;
- }
- static char my_showid[200];
- #define DEF_BUF_SIZ 4096
- main(int argc,char *argv[])
- {
- int ret,i,len=DEF_BUF_SIZ;
- T_SQL_Connect SQL_Connect;
- int Pflg=0;
- char *tabname=0;
- FILE *ifd,*ofd;
- INT64 now;
- char *myname;
- tzset();
- myname=sc_basename(argv[0]);
- sprintf(my_showid,"%s:%d",
- myname,getpid());
- Showid=my_showid;
- ifd=0;
- ofd=0;
- /*******************************************************************
- * get Opt
- *******************************************************************/
- for(i=1;i<argc;i++) {
- if(*argv[i]=='-') {
- switch(argv[i][1]) {
- case 'f':
- if(argv[i][2]) ret=envcfg(argv[i]+2);
- else {
- i++;
- ret=envcfg(argv[i]);
- }
- continue;
- case 'P': //输出 不能加载的记录
- Pflg=1;
- if(argv[i][2]) {
- ofd=fopen(argv[i]+2,"w");
- } else {
- i++;
- if(i<argc) ofd=fopen(argv[i],"w");
- }
- continue;
- case 'K': //设置工作buff,Kbyte。
- Pflg=1;
- if(argv[i][2]) {
- len=atoi(argv[i]+2);
- } else {
- i++;
- if(i<argc) len=atoi(argv[i]);;
- }
- continue;
- default:
- fprintf(stderr,"no know option:%s",argv[i]);
- fprintf(stderr,"Usage:%s -f 配置文件 [-P] 输出文件名 [-Kkbytes] [-W]WHERE 子句 [-D]执行日期 [-d]作业日期\n",
- argv[0]);
- continue;
- }
- }
- tabname=argv[i];
- }
- if(len < DEF_BUF_SIZ) len=DEF_BUF_SIZ;
- char buf[len];
- *buf=0;
- ShowLog(5,"tabname=%s",tabname);
- ret=db_open(&SQL_Connect);
- if(ret) {
- ShowLog(1,"Open Database err=%d.%s",
- SQL_Connect.Errno,
- SQL_Connect.ErrMsg);
- return 1;
- }
- ShowLog(5,"DB=%s",SQL_Connect.DBOWN);
- if(!ofd) ofd=stdout;
- ifd=stdin;
- now=now_usec();
- int cc;
- ret=loadfile(&SQL_Connect,tabname,ifd,ofd,Pflg,buf,sizeof(buf));
- ShowLog(2,"loadasc:load %d rec's time=%ld,buf=%s",ret,(long)(now_usec()-now),buf);
- now=now_usec();
- cc=___SQL_Transaction__(&SQL_Connect,TRANCOMMIT);
- ShowLog(5,"loadfile:%d commit TIMEVAL=%lld,ret=%d",ret,now_usec()-now,cc);
- if(ofd && ofd != stdout) fclose(ofd);
- ret=___SQL_CloseDatabase__(&SQL_Connect);
- return 0;
- }
复制代码
包装器提高效率的秘诀:耗时的操作在init里完成,给后续数据作业创造条件,数据操作时的开销极小。
这里我们看一下那个最频繁使用的OAD_exec:
int OAD_exec(OAD *oad,int begin,int n)
{
int ret;
oad->begin=begin;
oad->rows=(n<=(oad->max_rows_of_batch-begin))?n ad->max_rows_of_batch-begin;
if(!begin) { //只有原始的执行如此
//ShowLog(5,"%s:tabname=%s",__FUNCTION__,oad->srm->tabname);
BB_Tree_Scan(oad->bind_tree,bind_data);
}
ret=sqlo_execute1(oad->sth,oad->begin,n);
if(ret) {
___SQL_GetError(oad->SQL_Connect);
return -1;
}
if(oad->a_col_flg) BB_Tree_Scan(oad->bind_tree,get_ret);//如果有RETURNING变量,取回。
return sqlo_prows(oad->sth);
}
这个包装干了什么?
通过二叉树检查一下数据的NULL情况,设置表示器变量。(如果你自己做,这一步很可能遗漏哦!)
然后就是执行sqlo_exec了,最后取出成功行数。
就这么简单,效率想不高都难。
为什么比sqlldr效率高?因为它使用字符绑定,要每次都绑定变量。而我,一次绑定,多次使用。这得益于DAU内部使用结构,数据已经按类型放在固定的地方。这里回答了newkid的质问,这样的应用为何要使用结构?绑定变量开销很大的。
[ 本帖最后由 yulihua49 于 2010-8-25 12:43 编辑 ] |
|