Legend:
Page
Library
Module
Module type
Parameter
Class
Class type
Source
Source file parse_state.ml
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361openCoremoduleRow_buffer=Append_only_bufferexceptionBad_csv_formattingofstringlist*stringmoduleStep=structtypet=|Row_start|Field_start|In_unquoted_field|In_quoted_field|In_quoted_field_after_quoteendopenStepmoduleConfig=structtype'at={sep:char;quote:char;use_quoting:bool;strip:bool;f:line_number:int->'a->stringRow_buffer.t->'a;fields_used:intarrayoption}letcreate~sep~quote~strip~f~fields_used=letfields_used=matchfields_usedwith|None->None|Somefields_usedasxwhenArray.is_sorted_strictlyfields_used~compare:Int.ascending->x|Somefields_used->Some(Array.of_list(List.dedup_and_sort(Array.to_listfields_used)~compare:Int.ascending))in(matchquotewith|`UsingcwhenChar.equalcsep->invalid_arg"Delimited_kernel.Parse_state.create: cannot use the same character for [sep] \
and [quote]"|`Using(('\r'|'\n')asc)->invalid_arg(sprintf"Delimited_kernel.Parse_state.create: invalid [quote] character %C"c)|_->());(matchsepwith|('\r'|'\n')asc->invalid_arg(sprintf"Delimited_kernel.Parse_state.create: invalid [sep] character %C"c)|_->());{sep;quote=(matchquotewith|`Usingchar->char|`No_quoting->'"');use_quoting=(matchquotewith|`Using_->true|`No_quoting->false);strip;f;fields_used};;endmoduleState=structtype'at={field_buffer:string;row_buffer:stringlist;current_field:int;current_line_number:int(** The current line number *);row_line_number:int(** The line number of the beginning of the current row *);acc:'a;step:Step.t;finish:bool}letcreate~init~start_line_number={acc=init;step=Row_start;field_buffer="";row_buffer=[];current_field=0;row_line_number=start_line_number;current_line_number=start_line_number;finish=false};;letacc{acc;_}=accletset_acctacc={twithacc}endtype'at={config:'aConfig.t;state:'aState.t}letacct=State.acct.stateletset_acctacc={twithstate=State.set_acct.stateacc}letcreate?(strip=false)?(sep=',')?(quote=`Using'"')?(start_line_number=1)~fields_used~init~f()={config=Config.create~sep~quote~strip~f~fields_used;state=State.create~init~start_line_number};;moduleChar_kind=structtypet=|Backslash_r|Newline|Sep|Quote|Whitespace|Normalletof_char(t:_Config.t)c=letopenChar.Replace_polymorphic_compareinmatchcwith|'\r'->Backslash_r|'\n'->Newline|_whenc=t.quote&&t.use_quoting->Quote|_whenc=t.sep->Sep|_whenChar.is_whitespacec->Whitespace|_->Normal;;endmoduleMutable_state=struct(* We don't capture state [step] in here to avoid having to mutate the record at every
single iteration *)type'at={field_buffer:Buffer.t;row_buffer:stringRow_buffer.t;config:'aConfig.t;mutablecurrent_field:int;mutableenqueue:bool(* cache for should_enqueue *);mutablecurrent_line_number:int;mutablerow_line_number:int;mutableacc:'a}letrow_lengtht=Row_buffer.lengtht.row_buffer(* To reduce the number of allocations, we keep an array [fields_used] of the field
indexes we care about. [current_field] is the position of the parser within the
input row, and [next_field_index] is an index into the [fields_used] array
indicating the next field that we need to store.
If [fields_used] is None, we need to store every field.
*)letshould_enqueuefields_usedstate=matchfields_usedwith|None->true|Somearray->letnext_field_index=row_lengthstateinnext_field_index<Array.lengtharray&&array.(next_field_index)=state.current_field;;letcreate~(config:'aConfig.t)~(state:'aState.t)=letfield_buffer=Buffer.create(String.lengthstate.field_buffer)inBuffer.add_stringfield_bufferstate.field_buffer;letrow_buffer=Row_buffer.of_liststate.row_bufferinletstate={field_buffer;row_buffer;current_field=state.current_field;enqueue=false;config;row_line_number=state.row_line_number;current_line_number=state.current_line_number;acc=state.acc}inifshould_enqueueconfig.fields_usedstatethenstate.enqueue<-true;state;;letemit_chartc=ift.enqueuethenBuffer.add_chart.field_buffercletemit_fieldstate=ifstate.enqueuethen(Row_buffer.appendstate.row_buffer(ifstate.config.stripthenShared.strip_bufferstate.field_bufferelseBuffer.contentsstate.field_buffer);Buffer.clearstate.field_buffer);state.current_field<-state.current_field+1;state.enqueue<-should_enqueuestate.config.fields_usedstate;;letemit_rowstate=letacc=state.config.f~line_number:state.row_line_numberstate.accstate.row_bufferinstate.acc<-acc;Row_buffer.lax_clearstate.row_buffer;state.current_field<-0;state.enqueue<-should_enqueuestate.config.fields_usedstate;state.current_line_number<-state.current_line_number+1;state.row_line_number<-state.current_line_number;;letfreeze~stept:'aState.t={acc=t.acc;step;field_buffer=Buffer.contentst.field_buffer;row_buffer=Row_buffer.to_listt.row_buffer;current_field=t.current_field;current_line_number=t.current_line_number;row_line_number=t.row_line_number;finish=false};;letincr_line_numbert=t.current_line_number<-t.current_line_number+1endletinput_aux~gett~pos~leninput=ift.state.finishthenraise_s[%message"Delimited.Expert.Parse_state.input: Cannot feed more input to a state that has \
already been finalized"];letstate=Mutable_state.create~config:t.config~state:t.stateinletfeed_onecstep=matchstep,Char_kind.of_chart.configcwith|_,Backslash_r->step|(Row_start|Field_start),Quote->In_quoted_field|(Row_start|Field_start),Sep->Mutable_state.emit_fieldstate;Field_start|(Row_start|Field_start),Newline->Mutable_state.emit_fieldstate;Mutable_state.emit_rowstate;Row_start|(Row_start|Field_start),(Normal|Whitespace)->Mutable_state.emit_charstatec;In_unquoted_field|In_unquoted_field,Sep->Mutable_state.emit_fieldstate;Field_start|In_unquoted_field,Newline->Mutable_state.emit_fieldstate;Mutable_state.emit_rowstate;Row_start|In_unquoted_field,(Whitespace|Normal)->Mutable_state.emit_charstatec;step|In_unquoted_field,Quote->Mutable_state.emit_charstatec;step|In_quoted_field,Quote->In_quoted_field_after_quote|In_quoted_field,Newline->Mutable_state.emit_charstatec;Mutable_state.incr_line_numberstate;step|In_quoted_field,(Normal|Sep|Whitespace)->Mutable_state.emit_charstatec;step|In_quoted_field_after_quote,Quote->(* doubled quote *)Mutable_state.emit_charstatec;In_quoted_field|In_quoted_field_after_quote,_whenChar.equalc'0'->Mutable_state.emit_charstate'\000';In_quoted_field|In_quoted_field_after_quote,Sep->Mutable_state.emit_fieldstate;Field_start|In_quoted_field_after_quote,Newline->Mutable_state.emit_fieldstate;Mutable_state.emit_rowstate;Row_start|In_quoted_field_after_quote,Whitespace->step|In_quoted_field_after_quote,Normal->failwithf"In_quoted_field_after_quote looking at '%c' (line_number=%d)"cstate.current_line_number()inletloop_bound=len+posinletrecloopistep=ifi>=loop_boundthenstepelse(letc=getinputiinletstep=feed_onecstepinloop(i+1)step)inletstep=looppost.state.stepinletstate=Mutable_state.freeze~stepstatein{twithstate};;letinputt?(pos=0)?leninput=letlen=matchlenwith|None->Bytes.lengthinput-pos|Somelen->leniniflen<0||pos<0||pos+len>Bytes.lengthinputtheninvalid_arg"Delimited_kernel.Parse_state.input: index out of bound";input_aux~get:Bytes.unsafe_gett~pos~leninput;;letinput_stringt?(pos=0)?leninput=letlen=matchlenwith|None->String.lengthinput-pos|Somelen->leniniflen<0||pos<0||pos+len>String.lengthinputtheninvalid_arg"Delimited_kernel.Parse_state.input_string: index out of bound";input_aux~get:String.unsafe_gett~pos~leninput;;letcurrent_line_numbert=t.state.current_line_numberletat_beginning_of_rowt=matcht.state.stepwith|Row_start->true|Field_start|In_quoted_field|In_quoted_field_after_quote|In_unquoted_field->false;;letfinish({config;state}ast)=ift.state.finishthentelse(letstate=Mutable_state.create~config~statein(matcht.state.stepwith|Row_start->()|Field_start->Mutable_state.emit_fieldstate;Mutable_state.emit_rowstate|In_unquoted_field|In_quoted_field_after_quote->Mutable_state.emit_fieldstate;Mutable_state.emit_rowstate|In_quoted_field->raise(Bad_csv_formatting(t.state.row_buffer,t.state.field_buffer)));letstate={(Mutable_state.freeze~step:t.state.stepstate)withfinish=true}in{twithstate});;