/******************************************************************** * * * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * * by the Xiph.Org Foundation http://www.xiph.org/ * * * ******************************************************************** function: last mod: $Id: rate.c 16503 2009-08-22 18:14:02Z giles $ ********************************************************************/ #include #include #include "encint.h" /*A rough lookup table for tan(x), 0<=x>24; if(i>=17)i=16; t0=OC_ROUGH_TAN_LOOKUP[i]; t1=OC_ROUGH_TAN_LOOKUP[i+1]; d=_alpha*36-(i<<24); return (int)(((ogg_int64_t)t0<<32)+(t1-t0<<8)*(ogg_int64_t)d>>32); } /*Re-initialize the Bessel filter coefficients with the specified delay. This does not alter the x/y state, but changes the reaction time of the filter. Altering the time constant of a reactive filter without alterning internal state is something that has to be done carefuly, but our design operates at high enough delays and with small enough time constant changes to make it safe.*/ static void oc_iir_filter_reinit(oc_iir_filter *_f,int _delay){ int alpha; ogg_int64_t one48; ogg_int64_t warp; ogg_int64_t k1; ogg_int64_t k2; ogg_int64_t d; ogg_int64_t a; ogg_int64_t ik2; ogg_int64_t b1; ogg_int64_t b2; /*This borrows some code from an unreleased version of Postfish. See the recipe at http://unicorn.us.com/alex/2polefilters.html for details on deriving the filter coefficients.*/ /*alpha is Q24*/ alpha=(1<<24)/_delay; one48=(ogg_int64_t)1<<48; /*warp is 7.12*/ warp=OC_MAXI(oc_warp_alpha(alpha),1); /*k1 is 9.12*/ k1=3*warp; /*k2 is 16.24.*/ k2=k1*warp; /*d is 16.15.*/ d=((1<<12)+k1<<12)+k2+256>>9; /*a is 0.32, since d is larger than both 1.0 and k2.*/ a=(k2<<23)/d; /*ik2 is 25.24.*/ ik2=one48/k2; /*b1 is Q56; in practice, the integer ranges between -2 and 2.*/ b1=2*a*(ik2-(1<<24)); /*b2 is Q56; in practice, the integer ranges between -2 and 2.*/ b2=(one48<<8)-(4*a<<24)-b1; /*All of the filter parameters are Q24.*/ _f->c[0]=(ogg_int32_t)(b1+((ogg_int64_t)1<<31)>>32); _f->c[1]=(ogg_int32_t)(b2+((ogg_int64_t)1<<31)>>32); _f->g=(ogg_int32_t)(a+128>>8); } /*Initialize a 2nd order low-pass Bessel filter with the corresponding delay and initial value. _value is Q24.*/ static void oc_iir_filter_init(oc_iir_filter *_f,int _delay,ogg_int32_t _value){ oc_iir_filter_reinit(_f,_delay); _f->y[1]=_f->y[0]=_f->x[1]=_f->x[0]=_value; } static ogg_int64_t oc_iir_filter_update(oc_iir_filter *_f,ogg_int32_t _x){ ogg_int64_t c0; ogg_int64_t c1; ogg_int64_t g; ogg_int64_t x0; ogg_int64_t x1; ogg_int64_t y0; ogg_int64_t y1; ogg_int64_t ya; c0=_f->c[0]; c1=_f->c[1]; g=_f->g; x0=_f->x[0]; x1=_f->x[1]; y0=_f->y[0]; y1=_f->y[1]; ya=(_x+x0*2+x1)*g+y0*c0+y1*c1+(1<<23)>>24; _f->x[1]=(ogg_int32_t)x0; _f->x[0]=_x; _f->y[1]=(ogg_int32_t)y0; _f->y[0]=(ogg_int32_t)ya; return ya; } /*Search for the quantizer that matches the target most closely. We don't assume a linear ordering, but when there are ties we pick the quantizer closest to the old one.*/ static int oc_enc_find_qi_for_target(oc_enc_ctx *_enc,int _qti,int _qi_old, int _qi_min,ogg_int64_t _log_qtarget){ ogg_int64_t best_qdiff; int best_qi; int qi; best_qi=_qi_min; best_qdiff=_enc->log_qavg[_qti][best_qi]-_log_qtarget; best_qdiff=best_qdiff+OC_SIGNMASK(best_qdiff)^OC_SIGNMASK(best_qdiff); for(qi=_qi_min+1;qi<64;qi++){ ogg_int64_t qdiff; qdiff=_enc->log_qavg[_qti][qi]-_log_qtarget; qdiff=qdiff+OC_SIGNMASK(qdiff)^OC_SIGNMASK(qdiff); if(qdiffstate.qis[0]; /*If rate control is active, use the lambda for the _target_ quantizer. This allows us to scale to rates slightly lower than we'd normally be able to reach, and give the rate control a semblance of "fractional qi" precision. TODO: Add API for changing QI, and allow extra precision.*/ if(_enc->state.info.target_bitrate>0)lq=_enc->rc.log_qtarget; else lq=_enc->log_qavg[_qti][qi]; /*The resulting lambda value is less than 0x500000.*/ _enc->lambda=(int)oc_bexp64(2*lq-0x4780BD468D6B62BLL); /*Select additional quantizers. The R-D optimal block AC quantizer statistics suggest that the distribution is roughly Gaussian-like with a slight positive skew. K-means clustering on log_qavg to select 3 quantizers produces cluster centers of {log_qavg-0.6,log_qavg,log_qavg+0.7}. Experiments confirm these are relatively good choices. Although we do greedy R-D optimization of the qii flags to avoid switching too frequently, this becomes ineffective at low rates, either because we do a poor job of predicting the actual R-D cost, or the greedy optimization is not sufficient. Therefore adaptive quantization is disabled above an (experimentally suggested) threshold of log_qavg=7.00 (e.g., below INTRA qi=12 or INTER qi=20 with current matrices). This may need to be revised if the R-D cost estimation or qii flag optimization strategies change.*/ nqis=1; if(lq<(OC_Q57(56)>>3)&&!_enc->vp3_compatible){ qi1=oc_enc_find_qi_for_target(_enc,_qti,OC_MAXI(qi-1,0),0, lq+(OC_Q57(7)+5)/10); if(qi1!=qi)_enc->state.qis[nqis++]=qi1; qi1=oc_enc_find_qi_for_target(_enc,_qti,OC_MINI(qi+1,63),0, lq-(OC_Q57(6)+5)/10); if(qi1!=qi&&qi1!=_enc->state.qis[nqis-1])_enc->state.qis[nqis++]=qi1; } _enc->state.nqis=nqis; } /*Binary exponential of _log_scale with 24-bit fractional precision and saturation. _log_scale: A binary logarithm in Q24 format. Return: The binary exponential in Q24 format, saturated to 2**47-1 if _log_scale was too large.*/ static ogg_int64_t oc_bexp_q24(ogg_int32_t _log_scale){ if(_log_scale<(ogg_int32_t)23<<24){ ogg_int64_t ret; ret=oc_bexp64(((ogg_int64_t)_log_scale<<33)+OC_Q57(24)); return ret<0x7FFFFFFFFFFFLL?ret:0x7FFFFFFFFFFFLL; } return 0x7FFFFFFFFFFFLL; } /*Convenience function converts Q57 value to a clamped 32-bit Q24 value _in: input in Q57 format. Return: same number in Q24 */ static ogg_int32_t oc_q57_to_q24(ogg_int64_t _in){ ogg_int64_t ret; ret=_in+((ogg_int64_t)1<<32)>>33; /*0x80000000 is automatically converted to unsigned on 32-bit systems. -0x7FFFFFFF-1 is needed to avoid "promoting" the whole expression to unsigned.*/ return (ogg_int32_t)OC_CLAMPI(-0x7FFFFFFF-1,ret,0x7FFFFFFF); } /*Binary exponential of _log_scale with 24-bit fractional precision and saturation. _log_scale: A binary logarithm in Q57 format. Return: The binary exponential in Q24 format, saturated to 2**31-1 if _log_scale was too large.*/ static ogg_int32_t oc_bexp64_q24(ogg_int64_t _log_scale){ if(_log_scalerc.bits_per_frame=(_enc->state.info.target_bitrate* (ogg_int64_t)_enc->state.info.fps_denominator)/ _enc->state.info.fps_numerator; /*Insane framerates or frame sizes mean insane bitrates. Let's not get carried away.*/ if(_enc->rc.bits_per_frame>0x400000000000LL){ _enc->rc.bits_per_frame=(ogg_int64_t)0x400000000000LL; } else if(_enc->rc.bits_per_frame<32)_enc->rc.bits_per_frame=32; _enc->rc.buf_delay=OC_MAXI(_enc->rc.buf_delay,12); _enc->rc.max=_enc->rc.bits_per_frame*_enc->rc.buf_delay; /*Start with a buffer fullness of 50% plus 25% of the amount we plan to spend on a single keyframe interval. We can require fully half the bits in an interval for a keyframe, so this initial level gives us maximum flexibility for over/under-shooting in subsequent frames.*/ _enc->rc.target=(_enc->rc.max+1>>1)+(_enc->rc.bits_per_frame+2>>2)* OC_MINI(_enc->keyframe_frequency_force,_enc->rc.buf_delay); _enc->rc.fullness=_enc->rc.target; /*Pick exponents and initial scales for quantizer selection.*/ npixels=_enc->state.info.frame_width* (ogg_int64_t)_enc->state.info.frame_height; _enc->rc.log_npixels=oc_blog64(npixels); ibpp=npixels/_enc->rc.bits_per_frame; if(ibpp<1){ _enc->rc.exp[0]=59; _enc->rc.log_scale[0]=oc_blog64(1997)-OC_Q57(8); } else if(ibpp<2){ _enc->rc.exp[0]=55; _enc->rc.log_scale[0]=oc_blog64(1604)-OC_Q57(8); } else{ _enc->rc.exp[0]=48; _enc->rc.log_scale[0]=oc_blog64(834)-OC_Q57(8); } if(ibpp<4){ _enc->rc.exp[1]=100; _enc->rc.log_scale[1]=oc_blog64(2249)-OC_Q57(8); } else if(ibpp<8){ _enc->rc.exp[1]=95; _enc->rc.log_scale[1]=oc_blog64(1751)-OC_Q57(8); } else{ _enc->rc.exp[1]=73; _enc->rc.log_scale[1]=oc_blog64(1260)-OC_Q57(8); } _enc->rc.prev_drop_count=0; _enc->rc.log_drop_scale=OC_Q57(0); /*Set up second order followers, initialized according to corresponding time constants.*/ oc_iir_filter_init(&_enc->rc.scalefilter[0],4, oc_q57_to_q24(_enc->rc.log_scale[0])); inter_delay=(_enc->rc.twopass? OC_MAXI(_enc->keyframe_frequency_force,12):_enc->rc.buf_delay)>>1; _enc->rc.inter_count=0; /*We clamp the actual inter_delay to a minimum of 10 to work within the range of values where later incrementing the delay works as designed. 10 is not an exact choice, but rather a good working trade-off.*/ _enc->rc.inter_delay=10; _enc->rc.inter_delay_target=inter_delay; oc_iir_filter_init(&_enc->rc.scalefilter[1],_enc->rc.inter_delay, oc_q57_to_q24(_enc->rc.log_scale[1])); oc_iir_filter_init(&_enc->rc.vfrfilter,4, oc_bexp64_q24(_enc->rc.log_drop_scale)); } void oc_rc_state_init(oc_rc_state *_rc,oc_enc_ctx *_enc){ _rc->twopass=0; _rc->twopass_buffer_bytes=0; _rc->twopass_force_kf=0; _rc->frame_metrics=NULL; _rc->rate_bias=0; if(_enc->state.info.target_bitrate>0){ /*The buffer size is set equal to the keyframe interval, clamped to the range [12,256] frames. The 12 frame minimum gives us some chance to distribute bit estimation errors. The 256 frame maximum means we'll require 8-10 seconds of pre-buffering at 24-30 fps, which is not unreasonable.*/ _rc->buf_delay=_enc->keyframe_frequency_force>256? 256:_enc->keyframe_frequency_force; /*By default, enforce all buffer constraints.*/ _rc->drop_frames=1; _rc->cap_overflow=1; _rc->cap_underflow=0; oc_enc_rc_reset(_enc); } } void oc_rc_state_clear(oc_rc_state *_rc){ _ogg_free(_rc->frame_metrics); } void oc_enc_rc_resize(oc_enc_ctx *_enc){ /*If encoding has not yet begun, reset the buffer state.*/ if(_enc->state.curframe_num<0)oc_enc_rc_reset(_enc); else{ int idt; /*Otherwise, update the bounds on the buffer, but not the current fullness.*/ _enc->rc.bits_per_frame=(_enc->state.info.target_bitrate* (ogg_int64_t)_enc->state.info.fps_denominator)/ _enc->state.info.fps_numerator; /*Insane framerates or frame sizes mean insane bitrates. Let's not get carried away.*/ if(_enc->rc.bits_per_frame>0x400000000000LL){ _enc->rc.bits_per_frame=(ogg_int64_t)0x400000000000LL; } else if(_enc->rc.bits_per_frame<32)_enc->rc.bits_per_frame=32; _enc->rc.buf_delay=OC_MAXI(_enc->rc.buf_delay,12); _enc->rc.max=_enc->rc.bits_per_frame*_enc->rc.buf_delay; _enc->rc.target=(_enc->rc.max+1>>1)+(_enc->rc.bits_per_frame+2>>2)* OC_MINI(_enc->keyframe_frequency_force,_enc->rc.buf_delay); /*Update the INTER-frame scale filter delay. We jump to it immediately if we've already seen enough frames; otherwise it is simply set as the new target.*/ _enc->rc.inter_delay_target=idt=OC_MAXI(_enc->rc.buf_delay>>1,10); if(idtrc.inter_delay,_enc->rc.inter_count)){ oc_iir_filter_init(&_enc->rc.scalefilter[1],idt, _enc->rc.scalefilter[1].y[0]); _enc->rc.inter_delay=idt; } } /*If we're in pass-2 mode, make sure the frame metrics array is big enough to hold frame statistics for the full buffer.*/ if(_enc->rc.twopass==2){ int cfm; int buf_delay; int reset_window; buf_delay=_enc->rc.buf_delay; reset_window=_enc->rc.frame_metrics==NULL&&(_enc->rc.frames_total[0]==0|| buf_delay<_enc->rc.frames_total[0]+_enc->rc.frames_total[1] +_enc->rc.frames_total[2]); cfm=_enc->rc.cframe_metrics; /*Only try to resize the frame metrics buffer if a) it's too small and b) we were using a finite buffer, or are about to start.*/ if(cfmrc.frame_metrics!=NULL||reset_window)){ oc_frame_metrics *fm; int nfm; int fmh; fm=(oc_frame_metrics *)_ogg_realloc(_enc->rc.frame_metrics, buf_delay*sizeof(*_enc->rc.frame_metrics)); if(fm==NULL){ /*We failed to allocate a finite buffer.*/ /*If we don't have a valid 2-pass header yet, just return; we'll reset the buffer size when we read the header.*/ if(_enc->rc.frames_total[0]==0)return; /*Otherwise revert to the largest finite buffer previously set, or to whole-file buffering if we were still using that.*/ _enc->rc.buf_delay=_enc->rc.frame_metrics!=NULL? cfm:_enc->rc.frames_total[0]+_enc->rc.frames_total[1] +_enc->rc.frames_total[2]; oc_enc_rc_resize(_enc); return; } _enc->rc.frame_metrics=fm; _enc->rc.cframe_metrics=buf_delay; /*Re-organize the circular buffer.*/ fmh=_enc->rc.frame_metrics_head; nfm=_enc->rc.nframe_metrics; if(fmh+nfm>cfm){ int shift; shift=OC_MINI(fmh+nfm-cfm,buf_delay-cfm); memcpy(fm+cfm,fm,OC_MINI(fmh+nfm-cfm,buf_delay-cfm)*sizeof(*fm)); if(fmh+nfm>buf_delay)memmove(fm,fm+shift,fmh+nfm-buf_delay); } } /*We were using whole-file buffering; now we're not.*/ if(reset_window){ _enc->rc.nframes[0]=_enc->rc.nframes[1]=_enc->rc.nframes[2]=0; _enc->rc.scale_sum[0]=_enc->rc.scale_sum[1]=0; _enc->rc.scale_window_end=_enc->rc.scale_window0= _enc->state.curframe_num+_enc->prev_dup_count+1; if(_enc->rc.twopass_buffer_bytes){ int qti; /*We already read the metrics for the first frame in the window.*/ *(_enc->rc.frame_metrics)=*&_enc->rc.cur_metrics; _enc->rc.nframe_metrics++; qti=_enc->rc.cur_metrics.frame_type; _enc->rc.nframes[qti]++; _enc->rc.nframes[2]+=_enc->rc.cur_metrics.dup_count; _enc->rc.scale_sum[qti]+=oc_bexp_q24(_enc->rc.cur_metrics.log_scale); _enc->rc.scale_window_end+=_enc->rc.cur_metrics.dup_count+1; if(_enc->rc.scale_window_end-_enc->rc.scale_window0rc.twopass_buffer_bytes=0; } } } /*Otherwise, we could shrink the size of the current window, if necessary, but leaving it like it is lets us adapt to the new buffer size more gracefully.*/ } } /*Scale the number of frames by the number of expected drops/duplicates.*/ static int oc_rc_scale_drop(oc_rc_state *_rc,int _nframes){ if(_rc->prev_drop_count>0||_rc->log_drop_scale>OC_Q57(0)){ ogg_int64_t dup_scale; dup_scale=oc_bexp64((_rc->log_drop_scale +oc_blog64(_rc->prev_drop_count+1)>>1)+OC_Q57(8)); if(dup_scale<_nframes<<8){ int dup_scalei; dup_scalei=(int)dup_scale; if(dup_scalei>0)_nframes=((_nframes<<8)+dup_scalei-1)/dup_scalei; } else _nframes=!!_nframes; } return _nframes; } int oc_enc_select_qi(oc_enc_ctx *_enc,int _qti,int _clamp){ ogg_int64_t rate_total; ogg_int64_t rate_bias; int nframes[2]; int buf_delay; int buf_pad; ogg_int64_t log_qtarget; ogg_int64_t log_scale0; ogg_int64_t log_cur_scale; ogg_int64_t log_qexp; int exp0; int old_qi; int qi; /*Figure out how to re-distribute bits so that we hit our fullness target before the last keyframe in our current buffer window (after the current frame), or the end of the buffer window, whichever comes first.*/ log_cur_scale=(ogg_int64_t)_enc->rc.scalefilter[_qti].y[0]<<33; buf_pad=0; switch(_enc->rc.twopass){ default:{ ogg_uint32_t next_key_frame; /*Single pass mode: assume only forced keyframes and attempt to estimate the drop count for VFR content.*/ next_key_frame=_qti?_enc->keyframe_frequency_force -(_enc->state.curframe_num-_enc->state.keyframe_num):0; nframes[0]=(_enc->rc.buf_delay-OC_MINI(next_key_frame,_enc->rc.buf_delay) +_enc->keyframe_frequency_force-1)/_enc->keyframe_frequency_force; if(nframes[0]+_qti>1){ nframes[0]--; buf_delay=next_key_frame+nframes[0]*_enc->keyframe_frequency_force; } else buf_delay=_enc->rc.buf_delay; nframes[1]=buf_delay-nframes[0]; /*Downgrade the delta frame rate to correspond to the recent drop count history.*/ nframes[1]=oc_rc_scale_drop(&_enc->rc,nframes[1]); }break; case 1:{ /*Pass 1 mode: use a fixed qi value.*/ qi=_enc->state.qis[0]; _enc->rc.log_qtarget=_enc->log_qavg[_qti][qi]; return qi; }break; case 2:{ ogg_int64_t scale_sum[2]; int qti; /*Pass 2 mode: we know exactly how much of each frame type there is in the current buffer window, and have estimates for the scales.*/ nframes[0]=_enc->rc.nframes[0]; nframes[1]=_enc->rc.nframes[1]; scale_sum[0]=_enc->rc.scale_sum[0]; scale_sum[1]=_enc->rc.scale_sum[1]; /*The window size can be slightly larger than the buffer window for VFR content; clamp it down, if appropriate (the excess will all be dup frames).*/ buf_delay=OC_MINI(_enc->rc.scale_window_end-_enc->rc.scale_window0, _enc->rc.buf_delay); /*If we're approaching the end of the file, add some slack to keep us from slamming into a rail. Our rate accuracy goes down, but it keeps the result sensible. We position the target where the first forced keyframe beyond the end of the file would be (for consistency with 1-pass mode).*/ buf_pad=OC_MINI(_enc->rc.buf_delay,_enc->state.keyframe_num +_enc->keyframe_frequency_force-_enc->rc.scale_window0); if(buf_delayrc.frame_metrics!=NULL){ int fmi; int fm_tail; fm_tail=_enc->rc.frame_metrics_head+_enc->rc.nframe_metrics; if(fm_tail>=_enc->rc.cframe_metrics)fm_tail-=_enc->rc.cframe_metrics; for(fmi=fm_tail;;){ oc_frame_metrics *m; fmi--; if(fmi<0)fmi+=_enc->rc.cframe_metrics; /*Stop before we remove the first frame.*/ if(fmi==_enc->rc.frame_metrics_head)break; m=_enc->rc.frame_metrics+fmi; /*If we find a keyframe, remove it and everything past it.*/ if(m->frame_type==OC_INTRA_FRAME){ do{ qti=m->frame_type; nframes[qti]--; scale_sum[qti]-=oc_bexp_q24(m->log_scale); buf_delay-=m->dup_count+1; fmi++; if(fmi>=_enc->rc.cframe_metrics)fmi=0; m=_enc->rc.frame_metrics+fmi; } while(fmi!=fm_tail); /*And stop scanning backwards.*/ break; } } } } /*If we're not using the same frame type as in pass 1 (because someone changed the keyframe interval), remove that scale estimate. We'll add in a replacement for the correct frame type below.*/ qti=_enc->rc.cur_metrics.frame_type; if(qti!=_qti){ nframes[qti]--; scale_sum[qti]-=oc_bexp_q24(_enc->rc.cur_metrics.log_scale); } /*Compute log_scale estimates for each frame type from the pass-1 scales we measured in the current window.*/ for(qti=0;qti<2;qti++){ _enc->rc.log_scale[qti]=nframes[qti]>0? oc_blog64(scale_sum[qti])-oc_blog64(nframes[qti])-OC_Q57(24): -_enc->rc.log_npixels; } /*If we're not using the same frame type as in pass 1, add a scale estimate for the corresponding frame using the current low-pass filter value. This is mostly to ensure we have a valid estimate even when pass 1 had no frames of this type in the buffer window. TODO: We could also plan ahead and figure out how many keyframes we'll be forced to add in the current buffer window.*/ qti=_enc->rc.cur_metrics.frame_type; if(qti!=_qti){ ogg_int64_t scale; scale=_enc->rc.log_scale[_qti]rc.log_scale[_qti]+OC_Q57(24)):0x7FFFFFFFFFFFLL; scale*=nframes[_qti]; nframes[_qti]++; scale+=oc_bexp_q24(log_cur_scale>>33); _enc->rc.log_scale[_qti]=oc_blog64(scale) -oc_blog64(nframes[qti])-OC_Q57(24); } else log_cur_scale=(ogg_int64_t)_enc->rc.cur_metrics.log_scale<<33; /*Add the padding from above. This basically reverts to 1-pass estimations in the last keyframe interval.*/ if(buf_pad>0){ ogg_int64_t scale; int nextra_frames; /*Extend the buffer.*/ buf_delay+=buf_pad; /*Add virtual delta frames according to the estimated drop count.*/ nextra_frames=oc_rc_scale_drop(&_enc->rc,buf_pad); /*And blend in the low-pass filtered scale according to how many frames we added.*/ scale= oc_bexp64(_enc->rc.log_scale[1]+OC_Q57(24))*(ogg_int64_t)nframes[1] +oc_bexp_q24(_enc->rc.scalefilter[1].y[0])*(ogg_int64_t)nextra_frames; nframes[1]+=nextra_frames; _enc->rc.log_scale[1]=oc_blog64(scale)-oc_blog64(nframes[1])-OC_Q57(24); } }break; } /*If we've been missing our target, add a penalty term.*/ rate_bias=(_enc->rc.rate_bias/(_enc->state.curframe_num+1000))* (buf_delay-buf_pad); /*rate_total is the total bits available over the next buf_delay frames.*/ rate_total=_enc->rc.fullness-_enc->rc.target+rate_bias +buf_delay*_enc->rc.bits_per_frame; log_scale0=_enc->rc.log_scale[_qti]+_enc->rc.log_npixels; /*If there aren't enough bits to achieve our desired fullness level, use the minimum quality permitted.*/ if(rate_total<=buf_delay)log_qtarget=OC_QUANT_MAX_LOG; else{ static const ogg_int64_t LOG_KEY_RATIO=0x0137222BB70747BALL; ogg_int64_t log_scale1; ogg_int64_t rlo; ogg_int64_t rhi; log_scale1=_enc->rc.log_scale[1-_qti]+_enc->rc.log_npixels; rlo=0; rhi=(rate_total+nframes[_qti]-1)/nframes[_qti]; while(rlo>1; log_rpow=oc_blog64(curr)-log_scale0; log_rpow=(log_rpow+(_enc->rc.exp[_qti]>>1))/_enc->rc.exp[_qti]; if(_qti)log_rpow+=LOG_KEY_RATIO>>6; else log_rpow-=LOG_KEY_RATIO>>6; log_rpow*=_enc->rc.exp[1-_qti]; rscale=nframes[1-_qti]*oc_bexp64(log_scale1+log_rpow); rdiff=nframes[_qti]*curr+rscale-rate_total; if(rdiff<0)rlo=curr+1; else if(rdiff>0)rhi=curr-1; else break; } log_qtarget=OC_Q57(2)-((oc_blog64(rlo)-log_scale0+(_enc->rc.exp[_qti]>>1))/ _enc->rc.exp[_qti]<<6); log_qtarget=OC_MINI(log_qtarget,OC_QUANT_MAX_LOG); } /*The above allocation looks only at the total rate we'll accumulate in the next buf_delay frames. However, we could overflow the buffer on the very next frame, so check for that here, if we're not using a soft target.*/ exp0=_enc->rc.exp[_qti]; if(_enc->rc.cap_overflow){ ogg_int64_t margin; ogg_int64_t soft_limit; ogg_int64_t log_soft_limit; /*Allow 3% of the buffer for prediction error. This should be plenty, and we don't mind if we go a bit over; we only want to keep these bits from being completely wasted.*/ margin=_enc->rc.max+31>>5; /*We want to use at least this many bits next frame.*/ soft_limit=_enc->rc.fullness+_enc->rc.bits_per_frame-(_enc->rc.max-margin); log_soft_limit=oc_blog64(soft_limit); /*If we're predicting we won't use that many...*/ log_qexp=(log_qtarget-OC_Q57(2)>>6)*exp0; if(log_scale0-log_qexp>32)* ((OC_MINI(margin,soft_limit)<<32)/margin); log_qtarget=((log_qexp+(exp0>>1))/exp0<<6)+OC_Q57(2); } } /*If this was not one of the initial frames, limit the change in quality.*/ old_qi=_enc->state.qis[0]; if(_clamp){ ogg_int64_t log_qmin; ogg_int64_t log_qmax; /*Clamp the target quantizer to within [0.8*Q,1.2*Q], where Q is the current quantizer. TODO: With user-specified quant matrices, we need to enlarge these limits if they don't actually let us change qi values.*/ log_qmin=_enc->log_qavg[_qti][old_qi]-0x00A4D3C25E68DC58LL; log_qmax=_enc->log_qavg[_qti][old_qi]+0x00A4D3C25E68DC58LL; log_qtarget=OC_CLAMPI(log_qmin,log_qtarget,log_qmax); } /*The above allocation looks only at the total rate we'll accumulate in the next buf_delay frames. However, we could bust the budget on the very next frame, so check for that here, if we're not using a soft target.*/ /* Disabled when our minimum qi > 0; if we saturate log_qtarget to to the maximum possible size when we have a minimum qi, the resulting lambda will interact very strangely with SKIP. The resulting artifacts look like waterfalls. */ if(_enc->state.info.quality==0){ ogg_int64_t log_hard_limit; /*Compute the maximum number of bits we can use in the next frame. Allow 50% of the rate for a single frame for prediction error. This may not be enough for keyframes or sudden changes in complexity.*/ log_hard_limit=oc_blog64(_enc->rc.fullness+(_enc->rc.bits_per_frame>>1)); /*If we're predicting we'll use more than this...*/ log_qexp=(log_qtarget-OC_Q57(2)>>6)*exp0; if(log_scale0-log_qexp>log_hard_limit){ /*Force the target to hit our limit exactly.*/ log_qexp=log_scale0-log_hard_limit; log_qtarget=((log_qexp+(exp0>>1))/exp0<<6)+OC_Q57(2); /*If that target is unreasonable, oh well; we'll have to drop.*/ log_qtarget=OC_MINI(log_qtarget,OC_QUANT_MAX_LOG); } } /*Compute a final estimate of the number of bits we plan to use.*/ log_qexp=(log_qtarget-OC_Q57(2)>>6)*_enc->rc.exp[_qti]; _enc->rc.rate_bias+=oc_bexp64(log_cur_scale+_enc->rc.log_npixels-log_qexp); qi=oc_enc_find_qi_for_target(_enc,_qti,old_qi, _enc->state.info.quality,log_qtarget); /*Save the quantizer target for lambda calculations.*/ _enc->rc.log_qtarget=log_qtarget; return qi; } int oc_enc_update_rc_state(oc_enc_ctx *_enc, long _bits,int _qti,int _qi,int _trial,int _droppable){ ogg_int64_t buf_delta; ogg_int64_t log_scale; int dropped; dropped=0; /* Drop frames also disabled for now in the case of infinite-buffer two-pass mode */ if(!_enc->rc.drop_frames||_enc->rc.twopass&&_enc->rc.frame_metrics==NULL){ _droppable=0; } buf_delta=_enc->rc.bits_per_frame*(1+_enc->dup_count); if(_bits<=0){ /*We didn't code any blocks in this frame.*/ log_scale=OC_Q57(-64); _bits=0; } else{ ogg_int64_t log_bits; ogg_int64_t log_qexp; /*Compute the estimated scale factor for this frame type.*/ log_bits=oc_blog64(_bits); log_qexp=_enc->rc.log_qtarget-OC_Q57(2); log_qexp=(log_qexp>>6)*(_enc->rc.exp[_qti]); log_scale=OC_MINI(log_bits-_enc->rc.log_npixels+log_qexp,OC_Q57(16)); } /*Special two-pass processing.*/ switch(_enc->rc.twopass){ case 1:{ /*Pass 1 mode: save the metrics for this frame.*/ _enc->rc.cur_metrics.log_scale=oc_q57_to_q24(log_scale); _enc->rc.cur_metrics.dup_count=_enc->dup_count; _enc->rc.cur_metrics.frame_type=_enc->state.frame_type; _enc->rc.twopass_buffer_bytes=0; }break; case 2:{ /*Pass 2 mode:*/ if(!_trial){ ogg_int64_t next_frame_num; int qti; /*Move the current metrics back one frame.*/ *&_enc->rc.prev_metrics=*&_enc->rc.cur_metrics; next_frame_num=_enc->state.curframe_num+_enc->dup_count+1; /*Back out the last frame's statistics from the sliding window.*/ qti=_enc->rc.prev_metrics.frame_type; _enc->rc.frames_left[qti]--; _enc->rc.frames_left[2]-=_enc->rc.prev_metrics.dup_count; _enc->rc.nframes[qti]--; _enc->rc.nframes[2]-=_enc->rc.prev_metrics.dup_count; _enc->rc.scale_sum[qti]-=oc_bexp_q24(_enc->rc.prev_metrics.log_scale); _enc->rc.scale_window0=(int)next_frame_num; /*Free the corresponding entry in the circular buffer.*/ if(_enc->rc.frame_metrics!=NULL){ _enc->rc.nframe_metrics--; _enc->rc.frame_metrics_head++; if(_enc->rc.frame_metrics_head>=_enc->rc.cframe_metrics){ _enc->rc.frame_metrics_head=0; } } /*Mark us ready for the next 2-pass packet.*/ _enc->rc.twopass_buffer_bytes=0; /*Update state, so the user doesn't have to keep calling 2pass_in after they've fed in all the data when we're using a finite buffer.*/ _enc->prev_dup_count=_enc->dup_count; oc_enc_rc_2pass_in(_enc,NULL,0); } }break; } /*Common to all passes:*/ if(_bits>0){ if(_trial){ oc_iir_filter *f; /*Use the estimated scale factor directly if this was a trial.*/ f=_enc->rc.scalefilter+_qti; f->y[1]=f->y[0]=f->x[1]=f->x[0]=oc_q57_to_q24(log_scale); _enc->rc.log_scale[_qti]=log_scale; } else{ /*Lengthen the time constant for the INTER filter as we collect more frame statistics, until we reach our target.*/ if(_enc->rc.inter_delay<_enc->rc.inter_delay_target&& _enc->rc.inter_count>=_enc->rc.inter_delay&&_qti==OC_INTER_FRAME){ oc_iir_filter_reinit(&_enc->rc.scalefilter[1],++_enc->rc.inter_delay); } /*Otherwise update the low-pass scale filter for this frame type, regardless of whether or not we dropped this frame.*/ _enc->rc.log_scale[_qti]=oc_iir_filter_update( _enc->rc.scalefilter+_qti,oc_q57_to_q24(log_scale))<<33; /*If this frame busts our budget, it must be dropped.*/ if(_droppable&&_enc->rc.fullness+buf_delta<_bits){ _enc->rc.prev_drop_count+=1+_enc->dup_count; _bits=0; dropped=1; } else{ ogg_uint32_t drop_count; /*Update a low-pass filter to estimate the "real" frame rate taking drops and duplicates into account. This is only done if the frame is coded, as it needs the final count of dropped frames.*/ drop_count=_enc->rc.prev_drop_count+1; if(drop_count>0x7F)drop_count=0x7FFFFFFF; else drop_count<<=24; _enc->rc.log_drop_scale=oc_blog64(oc_iir_filter_update( &_enc->rc.vfrfilter,drop_count))-OC_Q57(24); /*Initialize the drop count for this frame to the user-requested dup count. It will be increased if we drop more frames.*/ _enc->rc.prev_drop_count=_enc->dup_count; } } /*Increment the INTER frame count, for filter adaptation purposes.*/ if(_enc->rc.inter_countrc.inter_count+=_qti; } /*Increase the drop count.*/ else _enc->rc.prev_drop_count+=1+_enc->dup_count; /*And update the buffer fullness level.*/ if(!_trial){ _enc->rc.fullness+=buf_delta-_bits; /*If we're too quick filling the buffer and overflow is capped, that rate is lost forever.*/ if(_enc->rc.cap_overflow&&_enc->rc.fullness>_enc->rc.max){ _enc->rc.fullness=_enc->rc.max; } /*If we're too quick draining the buffer and underflow is capped, don't try to make up that rate later.*/ if(_enc->rc.cap_underflow&&_enc->rc.fullness<0){ _enc->rc.fullness=0; } /*Adjust the bias for the real bits we've used.*/ _enc->rc.rate_bias-=_bits; } return dropped; } #define OC_RC_2PASS_VERSION (1) #define OC_RC_2PASS_HDR_SZ (38) #define OC_RC_2PASS_PACKET_SZ (8) static void oc_rc_buffer_val(oc_rc_state *_rc,ogg_int64_t _val,int _bytes){ while(_bytes-->0){ _rc->twopass_buffer[_rc->twopass_buffer_bytes++]=(unsigned char)(_val&0xFF); _val>>=8; } } int oc_enc_rc_2pass_out(oc_enc_ctx *_enc,unsigned char **_buf){ if(_enc->rc.twopass_buffer_bytes==0){ if(_enc->rc.twopass==0){ int qi; /*Pick first-pass qi for scale calculations.*/ qi=oc_enc_select_qi(_enc,0,0); _enc->state.nqis=1; _enc->state.qis[0]=qi; _enc->rc.twopass=1; _enc->rc.frames_total[0]=_enc->rc.frames_total[1]= _enc->rc.frames_total[2]=0; _enc->rc.scale_sum[0]=_enc->rc.scale_sum[1]=0; /*Fill in dummy summary values.*/ oc_rc_buffer_val(&_enc->rc,0x5032544F,4); oc_rc_buffer_val(&_enc->rc,OC_RC_2PASS_VERSION,4); oc_rc_buffer_val(&_enc->rc,0,OC_RC_2PASS_HDR_SZ-8); } else{ int qti; qti=_enc->rc.cur_metrics.frame_type; _enc->rc.scale_sum[qti]+=oc_bexp_q24(_enc->rc.cur_metrics.log_scale); _enc->rc.frames_total[qti]++; _enc->rc.frames_total[2]+=_enc->rc.cur_metrics.dup_count; oc_rc_buffer_val(&_enc->rc, _enc->rc.cur_metrics.dup_count|_enc->rc.cur_metrics.frame_type<<31,4); oc_rc_buffer_val(&_enc->rc,_enc->rc.cur_metrics.log_scale,4); } } else if(_enc->packet_state==OC_PACKET_DONE&& _enc->rc.twopass_buffer_bytes!=OC_RC_2PASS_HDR_SZ){ _enc->rc.twopass_buffer_bytes=0; oc_rc_buffer_val(&_enc->rc,0x5032544F,4); oc_rc_buffer_val(&_enc->rc,OC_RC_2PASS_VERSION,4); oc_rc_buffer_val(&_enc->rc,_enc->rc.frames_total[0],4); oc_rc_buffer_val(&_enc->rc,_enc->rc.frames_total[1],4); oc_rc_buffer_val(&_enc->rc,_enc->rc.frames_total[2],4); oc_rc_buffer_val(&_enc->rc,_enc->rc.exp[0],1); oc_rc_buffer_val(&_enc->rc,_enc->rc.exp[1],1); oc_rc_buffer_val(&_enc->rc,_enc->rc.scale_sum[0],8); oc_rc_buffer_val(&_enc->rc,_enc->rc.scale_sum[1],8); } else{ /*The data for this frame has already been retrieved.*/ *_buf=NULL; return 0; } *_buf=_enc->rc.twopass_buffer; return _enc->rc.twopass_buffer_bytes; } static size_t oc_rc_buffer_fill(oc_rc_state *_rc, unsigned char *_buf,size_t _bytes,size_t _consumed,size_t _goal){ while(_rc->twopass_buffer_fill<_goal&&_consumed<_bytes){ _rc->twopass_buffer[_rc->twopass_buffer_fill++]=_buf[_consumed++]; } return _consumed; } static ogg_int64_t oc_rc_unbuffer_val(oc_rc_state *_rc,int _bytes){ ogg_int64_t ret; int shift; ret=0; shift=0; while(_bytes-->0){ ret|=((ogg_int64_t)_rc->twopass_buffer[_rc->twopass_buffer_bytes++])<rc.twopass==0){ _enc->rc.twopass=2; _enc->rc.twopass_buffer_fill=0; _enc->rc.frames_total[0]=0; _enc->rc.nframe_metrics=0; _enc->rc.cframe_metrics=0; _enc->rc.frame_metrics_head=0; _enc->rc.scale_window0=0; _enc->rc.scale_window_end=0; } /*If we haven't got a valid summary header yet, try to parse one.*/ if(_enc->rc.frames_total[0]==0){ if(!_buf){ int frames_needed; /*If we're using a whole-file buffer, we just need the first frame. Otherwise, we may need as many as one per buffer slot.*/ frames_needed=_enc->rc.frame_metrics==NULL?1:_enc->rc.buf_delay; return OC_RC_2PASS_HDR_SZ+frames_needed*OC_RC_2PASS_PACKET_SZ -_enc->rc.twopass_buffer_fill; } consumed=oc_rc_buffer_fill(&_enc->rc, _buf,_bytes,consumed,OC_RC_2PASS_HDR_SZ); if(_enc->rc.twopass_buffer_fill>=OC_RC_2PASS_HDR_SZ){ ogg_int64_t scale_sum[2]; int exp[2]; int buf_delay; /*Read the summary header data.*/ /*Check the magic value and version number.*/ if(oc_rc_unbuffer_val(&_enc->rc,4)!=0x5032544F|| oc_rc_unbuffer_val(&_enc->rc,4)!=OC_RC_2PASS_VERSION){ _enc->rc.twopass_buffer_bytes=0; return TH_ENOTFORMAT; } _enc->rc.frames_total[0]=(ogg_uint32_t)oc_rc_unbuffer_val(&_enc->rc,4); _enc->rc.frames_total[1]=(ogg_uint32_t)oc_rc_unbuffer_val(&_enc->rc,4); _enc->rc.frames_total[2]=(ogg_uint32_t)oc_rc_unbuffer_val(&_enc->rc,4); exp[0]=(int)oc_rc_unbuffer_val(&_enc->rc,1); exp[1]=(int)oc_rc_unbuffer_val(&_enc->rc,1); scale_sum[0]=oc_rc_unbuffer_val(&_enc->rc,8); scale_sum[1]=oc_rc_unbuffer_val(&_enc->rc,8); /*Make sure the file claims to have at least one frame. Otherwise we probably got the placeholder data from an aborted pass 1. Also make sure the total frame count doesn't overflow an integer.*/ buf_delay=_enc->rc.frames_total[0]+_enc->rc.frames_total[1] +_enc->rc.frames_total[2]; if(_enc->rc.frames_total[0]==0||buf_delay<0|| (ogg_uint32_t)buf_delay<_enc->rc.frames_total[0]|| (ogg_uint32_t)buf_delay<_enc->rc.frames_total[1]){ _enc->rc.frames_total[0]=0; _enc->rc.twopass_buffer_bytes=0; return TH_EBADHEADER; } /*Got a valid header; set up pass 2.*/ _enc->rc.frames_left[0]=_enc->rc.frames_total[0]; _enc->rc.frames_left[1]=_enc->rc.frames_total[1]; _enc->rc.frames_left[2]=_enc->rc.frames_total[2]; /*If the user hasn't specified a buffer size, use the whole file.*/ if(_enc->rc.frame_metrics==NULL){ _enc->rc.buf_delay=buf_delay; _enc->rc.nframes[0]=_enc->rc.frames_total[0]; _enc->rc.nframes[1]=_enc->rc.frames_total[1]; _enc->rc.nframes[2]=_enc->rc.frames_total[2]; _enc->rc.scale_sum[0]=scale_sum[0]; _enc->rc.scale_sum[1]=scale_sum[1]; _enc->rc.scale_window_end=buf_delay; oc_enc_rc_reset(_enc); } _enc->rc.exp[0]=exp[0]; _enc->rc.exp[1]=exp[1]; /*Clear the header data from the buffer to make room for packet data.*/ _enc->rc.twopass_buffer_fill=0; _enc->rc.twopass_buffer_bytes=0; } } if(_enc->rc.frames_total[0]!=0){ ogg_int64_t curframe_num; int nframes_total; curframe_num=_enc->state.curframe_num; if(curframe_num>=0){ /*We just encoded a frame; make sure things matched.*/ if(_enc->rc.prev_metrics.dup_count!=_enc->prev_dup_count){ _enc->rc.twopass_buffer_bytes=0; return TH_EINVAL; } } curframe_num+=_enc->prev_dup_count+1; nframes_total=_enc->rc.frames_total[0]+_enc->rc.frames_total[1] +_enc->rc.frames_total[2]; if(curframe_num>=nframes_total){ /*We don't want any more data after the last frame, and we don't want to allow any more frames to be encoded.*/ _enc->rc.twopass_buffer_bytes=0; } else if(_enc->rc.twopass_buffer_bytes==0){ if(_enc->rc.frame_metrics==NULL){ /*We're using a whole-file buffer:*/ if(!_buf)return OC_RC_2PASS_PACKET_SZ-_enc->rc.twopass_buffer_fill; consumed=oc_rc_buffer_fill(&_enc->rc, _buf,_bytes,consumed,OC_RC_2PASS_PACKET_SZ); if(_enc->rc.twopass_buffer_fill>=OC_RC_2PASS_PACKET_SZ){ ogg_uint32_t dup_count; ogg_int32_t log_scale; int qti; int arg; /*Read the metrics for the next frame.*/ dup_count=oc_rc_unbuffer_val(&_enc->rc,4); log_scale=oc_rc_unbuffer_val(&_enc->rc,4); _enc->rc.cur_metrics.log_scale=log_scale; qti=(dup_count&0x80000000)>>31; _enc->rc.cur_metrics.dup_count=dup_count&0x7FFFFFFF; _enc->rc.cur_metrics.frame_type=qti; _enc->rc.twopass_force_kf=qti==OC_INTRA_FRAME; /*"Helpfully" set the dup count back to what it was in pass 1.*/ arg=_enc->rc.cur_metrics.dup_count; th_encode_ctl(_enc,TH_ENCCTL_SET_DUP_COUNT,&arg,sizeof(arg)); /*Clear the buffer for the next frame.*/ _enc->rc.twopass_buffer_fill=0; } } else{ int frames_needed; /*We're using a finite buffer:*/ frames_needed=OC_CLAMPI(0,_enc->rc.buf_delay -(_enc->rc.scale_window_end-_enc->rc.scale_window0), _enc->rc.frames_left[0]+_enc->rc.frames_left[1] -_enc->rc.nframes[0]-_enc->rc.nframes[1]); while(frames_needed>0){ if(!_buf){ return OC_RC_2PASS_PACKET_SZ*frames_needed -_enc->rc.twopass_buffer_fill; } consumed=oc_rc_buffer_fill(&_enc->rc, _buf,_bytes,consumed,OC_RC_2PASS_PACKET_SZ); if(_enc->rc.twopass_buffer_fill>=OC_RC_2PASS_PACKET_SZ){ oc_frame_metrics *m; int fmi; ogg_uint32_t dup_count; ogg_int32_t log_scale; int qti; /*Read the metrics for the next frame.*/ dup_count=oc_rc_unbuffer_val(&_enc->rc,4); log_scale=oc_rc_unbuffer_val(&_enc->rc,4); /*Add the to the circular buffer.*/ fmi=_enc->rc.frame_metrics_head+_enc->rc.nframe_metrics++; if(fmi>=_enc->rc.cframe_metrics)fmi-=_enc->rc.cframe_metrics; m=_enc->rc.frame_metrics+fmi; m->log_scale=log_scale; qti=(dup_count&0x80000000)>>31; m->dup_count=dup_count&0x7FFFFFFF; m->frame_type=qti; /*And accumulate the statistics over the window.*/ _enc->rc.nframes[qti]++; _enc->rc.nframes[2]+=m->dup_count; _enc->rc.scale_sum[qti]+=oc_bexp_q24(m->log_scale); _enc->rc.scale_window_end+=m->dup_count+1; /*Compute an upper bound on the number of remaining packets needed for the current window.*/ frames_needed=OC_CLAMPI(0,_enc->rc.buf_delay -(_enc->rc.scale_window_end-_enc->rc.scale_window0), _enc->rc.frames_left[0]+_enc->rc.frames_left[1] -_enc->rc.nframes[0]-_enc->rc.nframes[1]); /*Clear the buffer for the next frame.*/ _enc->rc.twopass_buffer_fill=0; _enc->rc.twopass_buffer_bytes=0; } /*Go back for more data.*/ else break; } /*If we've got all the frames we need, fill in the current metrics. We're ready to go.*/ if(frames_needed<=0){ int arg; *&_enc->rc.cur_metrics= *(_enc->rc.frame_metrics+_enc->rc.frame_metrics_head); _enc->rc.twopass_force_kf= _enc->rc.cur_metrics.frame_type==OC_INTRA_FRAME; /*"Helpfully" set the dup count back to what it was in pass 1.*/ arg=_enc->rc.cur_metrics.dup_count; th_encode_ctl(_enc,TH_ENCCTL_SET_DUP_COUNT,&arg,sizeof(arg)); /*Mark us ready for the next frame.*/ _enc->rc.twopass_buffer_bytes=1; } } } } return (int)consumed; }