1؎м|EF tN@VVR1҈oVst BsX,:urHt 0FxVN#0~u09rFN@ < t,1<usĘFsF<t SF@uP^uV0|>rUxV^V 1t$R1ZÊtLVtFtfjftSjjH@^ F6 PXE Boot:  WiLinuFreeBSĐDrive ??AϿAAХ#`'Aڥ`JVU<fjQPS1Pjf1ɎَѼ|}r,`|uuƀrIᾢ}K1҉ .w ޿)Itduddu`PDL Zs*}}0r4u.t"rURAZrUu tBRZrˀ?tfFRff1fC0fZf=w@(f8r^P[s Ot0FsF ^(FwReadBoot error PUWEVamnesiac?AWEV 1ABTX 1мfjf^(GG^╬s uU}EEEfh (֕Е @"ꌐ1ɱѱ8ٺ6 -)б3QPhj+5 QQQQRja %"1".Еf юَH" 1Ўػpܕtr4!PP 栈!桰!桰!XX!j4j0j,j(j$j jj j j j j j j4$D$`f|$Dut$Pt$PjWB^d$a<$t<$u D$ud$π j>j :j 6j 2j .j *j&j"jpjqjrjsjtju jvjwj`jt$D%]Fu,^+]B^u%1҉] j"uT$0taf] ]~Vq^^]6]]f "1؎ܕfffffa]ϼ]f`fffff1؎֕Е @"e1ɱюَ%]t$D%͕8ك=]tH~V]3 ]y5]ʹ^t ]fFaσuD$ `X6 tŰ=VtfvtatJCPu fu6 ЖFt -t t  u^ u pPXPX$< i/ u`1ɴPf 8cuf1< tPЀfBPr0ƀrf Pffaj@t=Gth~31Y1Ɖt <t1Z[^_]UWVSTljT$L$GD$D$,=G71ȉՙáG I G `7<=Tt=Tu+e49;u9|;u7==_wF e4jh=d=D$HT$L 8A 9|97 ȉ֙D$ =<TuL> ==D$HT$L \$( 7É t1ۋD$(1t$ 1957u97t$ڋD$,}757D$ H#D$(=<Tu(L$,D$PD$PD$Ht$LL$,D$Ht$@#t$ t$< 1 7D$HT$L t1փ3| w, 7 t19T$|9D$r7T$8#<<#7D$<%)ǁv957u97t ڋD$uS757D$<%);\$4v\$4T$‰ًD$0\$0G)\$4|$4wD$DT[^_]S HHf1)ȣGGG )?Ѓ<?? )Ȋ@?0G`G3u3t(?ƀ(?=(?tG(?(A(.G uh(Ah3h3) (?=(Gu1(G3t06u u (G31 t .G u95(G?aP5??4R3?Ph3d4t8 t6t](?1r ƒ t+ t tu(?vKh4?X--(?=wC.G u tUWVS4(G1Ņu=G5(Gh44D4f|$+5HG\$ى 9.<$ELF!D$G11'> FGD$,9}~1>+5H>G>ى}9>%HpGL$2T$0A9uXAk(D$ GPt> PuK1>{>G>ى9 |$HtGh 4h4$XZH(GУ4G?eG0GRjjj?? ? ?^3 R,G%c PWzf<$ 0\$T$ى96G>73 @s3>(P4[^_]UWVS(?CC 6 -ހ u!Ft< t< t < u4$ F$-C<> t*̫Q10@@̫Q<@T 08̫Q10 <  @e̫Qr@e@e `@ep? p?̫Q10@@<@TU ̫Q`@ePW<hX,z d ̫Q  8//@?/@A̫Q̫Q̫QTji+A̫Q̫Q̫QC̫Q̫Q̫Qvar/tmpDA̫Qc̫Q̫Q~r$:c̫Q=O̫QD|$d0c̫Q=O̫QC`r$c̫Q=O̫Q Bv$>@c̫Q=O̫Q !!JA̫Qc̫Q̫Q3W'$+̫Q=O̫Q(0%v$c̫Q=O̫Q8@$c̫Q=O̫QH >5O$Hc̫Q=O̫QPX`h%97V$$ c̫Q=O̫Qpx$c̫Q=O̫QZc̫Q=O̫Q0n$nc̫Q=O̫Q2mV$Nc̫Q=O̫Q5MX$c̫Q=O̫Q8q$c̫Q=O̫Q:8$ic̫Q=O̫Q7g3$c̫Q=O̫Q?ø6$}c̫Q=O̫Q@H A̫Qc̫Q̫QI^j$Nc̫Q=O̫QPX`hp(|$G c̫Q=O̫QxC$Qc̫Q=O̫Q)ST$"c̫Q=O̫QJmJA̫Qc̫Q̫QLb$)c̫Q=O̫Q^ׇ=A̫Qc̫Q̫QM$@c̫Q=O̫QN! $c̫Q=O̫Q/=$c̫Q=O̫QCk2$c̫Q=O̫Q (08@HPX`OY(A̫Qc̫Q̫Qqw $o c̫Q=O̫Qhšju$[c̫Q=O̫Q2|<$qc̫Q=O̫QpÉ$[c̫Q=O̫Q)$c̫Q=O̫Qx:g$kc̫Q=O̫Q6z{$ c̫Q=O̫QԊ$Cc̫Q=O̫Q"$c̫Q=O̫QX`hpxHq$Xc̫Q=O̫Q-eG$8 c̫Q=O̫Q:G$c̫Q=O̫QDC$'c̫Q=O̫Q tA۲$c̫Q=O̫Q( M$ c̫Q=O̫Q02 $ c̫Q=O̫Q8 A̫Qc̫Q̫QQ4&$[c̫Q=O̫Q@HPX`h.rTA̫Qc̫Q̫Q?@%$; c̫Q=O̫Qp&A̫Qc̫Q̫QRM$Mc̫Q=O̫Qx'#HA̫Qc̫Q̫Q~dA̫Qc̫Q̫Qe$c̫Q=O̫QC A̫Qc̫Q̫Q4$4c̫Q=O̫Q8A̫Qc̫Q̫Q6>A̫Qc̫Q̫Q7֭$ c̫Q=O̫Qx A̫Qc̫Q̫Qn5K$ c̫Q=O̫Q4J@A̫Qc̫Q̫Qo$c̫Q=O̫QynA̫Qc̫Q̫Q[ $3-c̫Q=O̫Q}$ c̫Q=O̫QP1$c̫Q=O̫Q~LA̫Qc̫Q̫Q $c̫Q=O̫Q Lk$c̫Q=O̫Q $zc̫Q=O̫Qm;sA̫Qc̫Q̫Qoo?p$l c̫Q=O̫Qz[$c̫Q=O̫QP($gc̫Q=O̫Q (08@H4ַ.A̫Qc̫Q̫Q衆$E<c̫Q=O̫QPX`hSX%$ c̫Q=O̫Qpc l$;c̫Q=O̫Qxpfk<A̫Qc̫Q̫Qo"$:$c̫Q=O̫Q}qd$<c̫Q=O̫Q'$ c̫Q=O̫Q%`A̫Qc̫Q̫Q?$c̫Q=O̫QL` $ $c̫Q=O̫Q (]$c̫Q=O̫Q0==$c̫Q=O̫Q8@@9$c̫Q=O̫QH 5 $/c̫Q=O̫QPX paA̫Qc̫Q̫QU> $c̫Q=O̫Qu<?$c̫Q=O̫Qv}Uv$!c̫Q=O̫Q`#r$c̫Q=O̫QW|$c̫Q=O̫Qh4 3 $c̫Q=O̫Q$c̫Q=O̫Q,$c̫Q=O̫Q]"e$c̫Q=O̫Qp i$Hc̫Q=O̫QxjWA̫Qc̫Q̫Q5c$c̫Q=O̫Q $^c̫Q=O̫Qe ]Q$a%c̫Q=O̫Qj$~c̫Q=O̫Q MA̫Qc̫Q̫Q_RM $c̫Q=O̫Qg #$ c̫Q=O̫Qy81$ c̫Q=O̫Q!x$#c̫Q=O̫Q/$I c̫Q=O̫Q_$c̫Q=O̫Q A̫Qc̫Q̫QgR$c̫Q=O̫Q;䃎$c̫Q=O̫Q Y74A̫Qc̫Q̫Q $pc̫Q=O̫Q (09_8$Sc̫Q=O̫Q8@HPX*3$c̫Q=O̫Q` D$+c̫Q=O̫QhpxX$Oc̫Q=O̫Q(A̫Qc̫Q̫Q$ c̫Q=O̫QA̫Qc̫Q̫Q $)c̫Q=O̫Q (08@HPX`hpx}^OA̫Qc̫Q̫Q=~;$c̫Q=O̫Q($<c̫Q=O̫Q\A̫Qc̫Q̫QF$c̫Q=O̫Q~}$?<c̫Q=O̫QM$^c̫Q=O̫Q (\:l$>c̫Q=O̫Q08 >I $Ic̫Q=O̫Q@HPX`%k`.A̫Qc̫Q̫Q $c̫Q=O̫Qh$nc̫Q=O̫Q (08-F$c̫Q=O̫Qh\$ c̫Q=O̫Qp $c̫Q=O̫Qx4A̫Qc̫Q̫QB$.c̫Q=O̫Qq{$c̫Q=O̫Q |A̫Qc̫Q̫Q $ c̫Q=O̫Q M`$2.c̫Q=O̫Q08@HPX`hpx 0V$jc̫Q=O̫Q( 6h$̫Q=O̫Q$c̫Q=O̫Q zf$]c̫Q=O̫Q hO)$:c̫Q=O̫Q ;$;c̫Q=O̫Q=X`%$c̫Q=O̫Qe ?RA̫Qc̫Q̫Q 0$/̫Q=O̫Q糩$ c̫Q=O̫Q$. c̫Q=O̫Q @f$c̫Q=O̫Qu M A̫Qc̫Q̫Qw4A̫Qc̫Q̫Q" A̫Qc̫Q̫QG $c̫Q=O̫QIҤ$xc̫Q=O̫Q( 0 8 @ H P X ` h EL$c̫Q=O̫Qp x @FtK$`c̫Q=O̫Q S=$Oc̫Q=O̫Qp!x!!!!!!!!!!!`!sI$c̫Q=O̫Q"""""""""###"`-A̫Qc̫Q̫Q%m_;$c̫Q=O̫QLb$ c̫Q=O̫QT$c̫Q=O̫Q"Y$c̫Q=O̫Q%Fr$c̫Q=O̫Ql$c̫Q=O̫Q@#'.$c̫Q=O̫QH#P# $ c̫Q=O̫QX#u 6$*c̫Q=O̫QT#Gaa$Nc̫Q=O̫Qp$Fc̫Q=O̫Q^#Cd$c̫Q=O̫Q`#m J$c̫Q=O̫Qh#E# =$Jc̫Q=O̫Qn H7$c̫Q=O̫Qp#x# @W$c̫Q=O̫Q|#l$c̫Q=O̫Q}#'?$rc̫Q=O̫Q#2L$mc̫Q=O̫Q#_$Ic̫Q=O̫Q#####%Erq$c̫Q=O̫Q#3$4c̫Q=O̫Q## {$c̫Q=O̫Q## W$rc̫Q=O̫Q8$@$H$P$X$`$h$p$x$$$$($H_Z$c̫Q=O̫Q0$5$c̫Q=O̫Qx%% 7$jc̫Q=O̫Q#$Nc̫Q=O̫Q%P$/ c̫Q=O̫Q%@$c̫Q=O̫Q%w -$9c̫Q=O̫Q%% )~5{$9c̫Q=O̫Q%% <#$& c̫Q=O̫Q%1$c̫Q=O̫Q% $ c̫Q=O̫Q%]wY$c̫Q=O̫Q%% X$ c̫Q=O̫Q%_W$,c̫Q=O̫Q%%%yL$d c̫Q=O̫Q%l\D$c̫Q=O̫Q%N*J$c̫Q=O̫Q% '$c̫Q=O̫Q%&E=$Ic̫Q=O̫Q&潑D$& c̫Q=O̫Q&c$c̫Q=O̫Q&9$ c̫Q=O̫Q &y$c̫Q=O̫Q#*ܖ$Uc̫Q=O̫Q(&0&8&@&H&%+╙$c̫Q=O̫QP&^$8c̫Q=O̫QT&wt$?c̫Q=O̫QX&`&h&p& 5$$@c̫Q=O̫Qx&&&&%!6H$Ac̫Q=O̫Q&&&&%!$Rc̫Q=O̫Q&&&&&&*+$*@c̫Q=O̫Q&&&&%!v$uCc̫Q=O̫Qp'x'''''''''''`' $fc̫Q=O̫Q 4(40484@4H4P4X4`4h4p4x44p$c̫Q=O̫Q44444444444%YQ.A̫Qd̫Q̫QM:D$c̫Q=O̫Q4p$c̫Q=O̫Q &1[$Pc̫Q=O̫Q44555'&)m$ c̫Q=O̫Q5$p;c̫Q=O̫Q5555555555555(e$c̫Q=O̫Q6666666667O&DU$ c̫Q=O̫QF+ $2c̫Q=O̫QFFFEo K$c̫Q=O̫QFF <$c̫Q=O̫QF: mg93$\c̫Q=O̫Q@GHGPGXG`GhGpGxGGGGG0Gh`$rc̫Q=O̫Q8GH k;$T+c̫Q=O̫QHHHh;$7c̫Q=O̫QHHHFK$c̫Q=O̫QHH Sc$ c̫Q=O̫QHj=I$ c̫Q=O̫QHH8$c̫Q=O̫QHI $gGc̫Q=O̫QIII II$z$I*c̫Q=O̫Q(I0I8II*N$c̫Q=O̫QIIIIIIIIIIJJIYa$7<c̫Q=O̫QLL L(L0L8L@LHLPLXL`LhLL$zc̫Q=O̫QLLLLLLLL>$c̫Q=O̫QLMMMM M(M0M?9A7*$pc̫Q=O̫Q8M@MHMPMXM`MhME9ɲ$c̫Q=O̫QpMxMMMMMMMMML\Ɣ$&c̫Q=O̫QMMMc$Rc̫Q=O̫QHy$n c̫Q=O̫QHNPNXN`NhNpNxNNNNNN8N|$.c̫Q=O̫QNNNr$c̫Q=O̫QH[$c̫Q=O̫QM$[=c̫Q=O̫QpOxOOOOOOOOOOO`O(%N$c̫Q=O̫QhOM x$Vc̫Q=O̫QPPPPPP,1$LJc̫Q=O̫QPPPPPPPPPD $c̫Q=O̫QQ*$c̫Q=O̫Q>I8%$c̫Q=O̫QQL Th!A̫Q̫Q̫Q(̫Q(̫Q̫QCJms(̫Q(̫Q̫QCiA̫Q'̫Q̫Q&$A̫Q̫Q̫QgS(A̫Q̫Q̫QSBS(̫Q(̫Q̫Q../man1dJ(̫Q(̫Q̫Q../man3$]A (̫Q(̫Q̫Qen.ISO8859-1XA̫Q;̫Q̫QT ;̫Q;̫Q̫QE.$7;̫Q;̫Q̫QGU"x;̫Q;̫Q̫QOUKA[M;̫Q;̫Q̫QwU;3;̫Q;̫Q̫QxQ;̫Q;̫Q̫Q6Tbx;̫Q;̫Q̫QUXr;̫Q;̫Q̫QU"A̫Q;̫Q̫QUyj$];̫Q;̫Q̫QU*|]$;̫Q;̫Q̫QV=S* $i;̫Q;̫Q̫QoV9$K;̫Q;̫Q̫QFT $;̫Q;̫Q̫QV 5$X;̫Q;̫Q̫QV{Y$;̫Q;̫Q̫QGWqB$i;̫Q;̫Q̫QWW`P$];̫Q;̫Q̫QWj9$;̫Q;̫Q̫QW8y$_;̫Q;̫Q̫QW>,\$`;̫Q;̫Q̫QW#Uz$9;̫Q;̫Q̫QWZA$c;̫Q;̫Q̫QWP$;̫Q;̫Q̫QW$;̫Q;̫Q̫QXcl$;̫Q;̫Q̫QX$]$;̫Q;̫Q̫QX1T`$;̫Q;̫Q̫QX/U$b;̫Q;̫Q̫Q'X42A̫Q'̫Q̫Q/XA̫QI̫Q̫QQ 4$!-I̫QI̫Q̫QdS$&I̫QI̫Q̫Q h5$e2I̫QI̫Q̫QH P X Tq$5/I̫QI̫Q̫Q8@H $~*I̫QI̫Q̫QPX`A- $>I̫QI̫Q̫Qhph$1I̫QI̫Q̫Qx?XW$E)I̫QI̫Q̫Q$7)I̫QI̫Q̫QגxI̫QI̫Q̫Qdutch1I̫QI̫Q̫QdutchnCqI̫QI̫Q̫QenglishC iI̫QI̫Q̫Qenglish I̫QI̫Q̫Qenglish =5I̫QI̫Q̫Qenglishm}I̫QI̫Q̫Qenglish4I̫QI̫Q̫QenglishVI̫QI̫Q̫QenglishP=I̫QI̫Q̫Qenglish"iEvI̫QI̫Q̫Qenglish~I̫QI̫Q̫Qenglishf9fmI̫QI̫Q̫QenglishI̫QI̫Q̫QenglishmI̫QI̫Q̫Qenglish9I̫QI̫Q̫Qenglish!I̫QI̫Q̫QenglishhI̫QI̫Q̫QenglishxuI̫QI̫Q̫QenglishI̫QI̫Q̫QfrenchObI̫QI̫Q̫Qfrench0I̫QI̫Q̫Qfrench_0I̫QI̫Q̫Qfrench'tI̫QI̫Q̫Qfrench撲I̫QI̫Q̫Qfrench뵛I̫QI̫Q̫Qfrench[z9I̫QI̫Q̫Qfrench$CI̫QI̫Q̫QgermanO>I̫QI̫Q̫Qgerman\I̫QI̫Q̫Qgerman"I̫QI̫Q̫Qgerman;wI̫QI̫Q̫QgermandܑZI̫QI̫Q̫Qgerman-f-I̫QI̫Q̫Qspanish I̫QI̫Q̫QspanishYSI̫QI̫Q̫Qswedish喫I̫QI̫Q̫Qswedish+ I̫QI̫Q̫Qpolish" (̫Q(̫Q̫Qusr/src/sys=F[X̫QX̫Q̫QfwU$2X̫QX̫Q̫Q  BCY̫QY̫Q̫QA̫Q$̫Q̫QGXr$A̫Q$̫Q̫QOXA̫Q$̫Q̫QWXzNC̫Q$̫Q̫Q_XvhA̫Q$̫Q̫QgX2A̫Qx̫Q̫QoXᯓJJ*$!q(U!r>o7@@ f;?cY>!Cv޹6y1DeG%4֎Hx\9/HtjiV Wa&ŀZzgՈ4kFBz?:hu[lh Ǧ^nqA*䭳R"p'wm~`RcK4郀/^ ɪ?*InkKO?P/=m`IV\"| y'`K]d-'oVKm )d W5jԞecPoN/'*.k"\&GxJ%aVTT`˭*nqjzWW6Ѓ2C U8s+QbhCOOũjAu=d6[rVDw,dH*eLeKUH/Q@/%5J3ÈR',|L&T7'$n0Hݷb.e9EF<rx&wIqoi!$ ͜>=}k-".Geik#wt$NsO>d&L~4.{Vz1vP1=N!Z*odecC6w2Et%?9PȆ9DeF*M l;-t4_:g=<ڿDW3G9pJ}O6?d7 /_őFi4<4[=;,RW(gHBGip7OPڜGNK!,D_?,uSԟ/5A?u :Ix6lcdOUR.^׺ksDcK9$3.BIxka*vo\.FVi3-C  @U>kz8 8 _n8jU 7)*Hq@EVyd-k)z42f pԷ\-1![V|}15F+!D[{Y `A8#[UWZF@pbSH߁D7~# m B=J8H/^7o$ҌOѧe&6YJjx rV3u0Ou Eo%U;9@caܭfb1!.)C]w\rTq=/VhUlo9Pxq'l8LFJTPNH-jiiJu(l`V,Y{oLw.3d7}%&S9M=ʃn*x2\SYcCJzW~%kK+o '2j3%lz@@qHyjl \XEIJ;(MSKnU5 BngXc% lXpHX!ٹw{fY bdPjG8 tzIuNFAf%H9 `e FXucUĻb)K5p4 [yiF41* u^Qu;%Gq%ͅ2NSMŻ/▗/\Bs^h)f>wzMtQ#!D>51<@'jQHur @?!ZX3"`מBH};nRMl":Bc%E tv p&Y܇h`AfaWV~ ܉zA0qءk3aKz>gvsH\NWHu8>9Ȓ@:)5C33I ݢn^CsbP8gT8v}#H "!V/iZF3j "㭩s+jK%3PtAIR?O7*9*r^!*+a;XokX5#zl- >kBx(HԴ)&V.f)DĂoERw *;Љ.S9-髶'?0oaM/Y,(UA4v#\#E m:{ QMd&[BNpu2+*/v/⟊>l<Ւ+8IYCjI%j!)~hLy]2Ĭ&~Wh'}Es-@O-Te<Zca+d6)@ L41a䓈-ӄI(<&67&)w<3B=hGiWL,NB6xFì^WrZ*22S9 q -L=D5XD;==LxAmAI7= lx+ ֭%K,v2XRw-w[M+0)4_PV7TV] ?ܿwM%3(e}~h3O&B0HT>$W|0x<984FvRVb4R\I'O xEqJ;KORoޅXwGLo}N^-s-fЏ=: !6S`qoq`rQ1ߊOA7"(vTjPZLm~m>.!p<ڗ&ap. Jy#gg "FA]_y!κ`aV9ŝGWP98$v݈gByhn=ୄiDorHB/t'ga> ;lYEأ #o,CqBCOފX};UK=֏zIx<{R^%p)LD#L.0(Y1.9|3m%?o p1)G[нM-Dnv_2ekYc܂3f;0ZQYNgkYXzAUcΖPV{6 H/W\IH8f:sxYwHShMʰ MU\n78[Euy%%"طmE 7i0-SV f6b'S~۳UpymFC9"|iM PW<l0.V?tk<>*.GH> d .I4 .. Config.pmFAQ.pod Tutorial.pod YAML.pmpackage ExtUtils::MakeMaker::Config; use strict; our $VERSION = '6.57_05'; use Config (); # Give us an overridable config. our %Config = %Config::Config; sub import { my $caller = caller; no strict 'refs'; ## no critic *{$caller.'::Config'} = \%Config; } 1; =head1 NAME ExtUtils::MakeMaker::Config - Wrapper around Config.pm =head1 SYNOPSIS use ExtUtils::MakeMaker::Config; print $Config{installbin}; # or whatever =head1 DESCRIPTION B A very thin wrapper around Config.pm so MakeMaker is easier to test. =cut =head1 AUTHOR The denizens of makemaker@perl.org. =head1 SEE ALSO L =cut =head1 SEE ALSO L gives stylistic help writing a module. L gives more information about how to write a module. There are modules to help you through the process of writing a module: L, L, L =cut 1; nd/or modify it under the same terms as Perl itself. =cut ./3 ..  Basename.pm  CheckTree.pm  Compare.pmCopy.pm DosGlob.pmFetch.pmFind.pm GlobMapper.pmPath.pmTemp.pm(stat.pm . ...snap` binboot%W dev, etc  liblibexec@&mediaW3 mntA&proc%Wʆrescue3root%W sbin%W tmp usrO var sys3.profile.cshrc COPYRIGHTdata%Wʊ cfg boot.configconf%Wturn 'foo/' dirname("foo/"); Under VMS, if there is no directory information in the $path, then the current default device and directory is used. =cut sub dirname { my $path = shift; my($type) = $Fileparse_fstype; if( $type eq 'VMS' and $path =~ m{/} ) { # Parse as Unix local($File::Basename::Fileparse_fstype) = ''; return dirname($path); } my($basename, $dirname) = fileparse($path); if ($type eq 'VMS') { $dirname ||= $ENV{DEFAULT}; } elsif ($type eq 'MacOS') { if( !length($basename) && $dirname !~ /^[^:]+:\z/) { _strip_trailing_sep($dirname); ($basename,$dirname) = fileparse $dirname; } $dirname .= ":" unless $dirname =~ /:\z/; } elsif (grep { $type eq $_ } qw(MSDOS DOS MSWin32 OS2)) { _strip_trailing_sep($dirname); unless( length($basename) ) { ($basename,$dirname) = fileparse $dirname; _strip_trailing_sep($dirname); } } elsif ($type eq 'AmigaOS') { if ( $dirname =~ /:\z/) { return $dirname } chop $dirname; $dirname =~ s{[^:/]+\z}{} unless length($basename); } else { _strip_trailing_sep($dirname); unless( length($basename) ) { ($basename,$dirname) = fileparse $dirname; _strip_trailing_sep($dirname); } } $dirname; } # Strip the trailing path separator. sub _strip_trailing_sep { my $type = $Fileparse_fstype; if ($type eq 'MacOS') { $_[0] =~ s/([^:]):\z/$1/s; } elsif (grep { $type eq $_ } qw(MSDOS DOS MSWin32 OS2)) { $_[0] =~ s/([^:])[\\\/]*\z/$1/; } else { $_[0] =~ s{(.)/*\z}{$1}s; } } =item C X my $type = fileparse_set_fstype(); my $previous_type = fileparse_set_fstype($type); Normally File::Basename will assume a file path type native to your current operating system (ie. /foo/bar style on Unix, \foo\bar on Windows, etc...). With this function you can override that assumption. Valid $types are "MacOS", "VMS", "AmigaOS", "OS2", "RISCOS", "MSWin32", "DOS" (also "MSDOS" for backwards bug compatibility), "Epoc" and "Unix" (all case-insensitive). If an unrecognized $type is given "Unix" will be assumed. If you've selected VMS syntax, and the file specification you pass to one of these routines contains a "/", they assume you are using Unix emulation and apply the Unix syntax rules instead, for that function call only. =back =cut BEGIN { my @Ignore_Case = qw(MacOS VMS AmigaOS OS2 RISCOS MSWin32 MSDOS DOS Epoc); my @Types = (@Ignore_Case, qw(Unix)); sub fileparse_set_fstype { my $old = $Fileparse_fstype; if (@_) { my $new_type = shift; $Fileparse_fstype = 'Unix'; # default foreach my $type (@Types) { $Fileparse_fstype = $type if $new_type =~ /^$type/i; } $Fileparse_igncase = (grep $Fileparse_fstype eq $_, @Ignore_Case) ? 1 : 0; } return $old; } } 1; =head1 SEE ALSO L, L, L functions return 0 if the files are equal, 1 if the files are unequal, or -1 if an error was encountered. =head1 AUTHOR File::Compare was written by Nick Ing-Simmons. Its original documentation was written by Chip Salzenberg. =cut  ...eserve file attributes or record structure. The system copy routine may also be called directly under VMS and OS/2 as C (or under VMS as C, which is the routine that does the actual work for syscopy). =item rmscopy($from,$to[,$date_flag]) X The first and second arguments may be strings, typeglobs, typeglob references, or objects inheriting from IO::Handle; they are used in all cases to obtain the I of the input and output files, respectively. The name and type of the input file are used as defaults for the output file, if necessary. A new version of the output file is always created, which inherits the structure and RMS attributes of the input file, except for owner and protections (and possibly timestamps; see below). All data from the input file is copied to the output file; if either of the first two parameters to C is a file handle, its position is unchanged. (Note that this means a file handle pointing to the output file will be associated with an old version of that file after C returns, not the newly created version.) The third parameter is an integer flag, which tells C how to handle timestamps. If it is E 0, none of the input file's timestamps are propagated to the output file. If it is E 0, then it is interpreted as a bitmask: if bit 0 (the LSB) is set, then timestamps other than the revision date are propagated; if bit 1 is set, the revision date is propagated. If the third parameter to C is 0, then it behaves much like the DCL COPY command: if the name or type of the output file was explicitly specified, then no timestamps are propagated, but if they were taken implicitly from the input filespec, then all timestamps other than the revision date are propagated. If this parameter is not supplied, it defaults to 0. Like C, C returns 1 on success. If an error occurs, it sets C<$!>, deletes the output file, and returns 0. =back =head1 RETURN All functions return 1 on success, 0 on failure. $! will be set if an error was encountered. =head1 AUTHOR File::Copy was written by Aaron Sherman Iajs@ajs.comE> in 1995, and updated by Charles Bailey Ibailey@newman.upenn.eduE> in 1996. =cut rlglob.bat Text::ParseWords =cut il }; *is_tainted = $@ ? \&is_tainted_pp : \&Scalar::Util::tainted; } 1; package ExtUtils::MakeMaker::FAQ; our $VERSION = '6.57_01'; 1; __END__ =head1 NAME ExtUtils::MakeMaker::FAQ - Frequently Asked Questions About MakeMaker =head1 DESCRIPTION FAQs, tricks and tips for C. =head2 Module Installation =over 4 =item How do I install a module into my home directory? If you're not the Perl administrator you probably don't have permission to install a module to its default location. Then you should install it for your own use into your home directory like so: # Non-unix folks, replace ~ with /path/to/your/home/dir perl Makefile.PL INSTALL_BASE=~ This will put modules into F<~/lib/perl5>, man pages into F<~/man> and programs into F<~/bin>. To ensure your Perl programs can see these newly installed modules, set your C environment variable to F<~/lib/perl5> or tell each of your programs to look in that directory with the following: use lib "$ENV{HOME}/lib/perl5"; or if $ENV{HOME} isn't set and you don't want to set it for some reason, do it the long way. use lib "/path/to/your/home/dir/lib/perl5"; =item How do I get MakeMaker and Module::Build to install to the same place? Module::Build, as of 0.28, supports two ways to install to the same location as MakeMaker. 1) Use INSTALL_BASE / C<--install_base> MakeMaker (as of 6.31) and Module::Build (as of 0.28) both can install to the same locations using the "install_base" concept. See L for details. To get MM and MB to install to the same location simply set INSTALL_BASE in MM and C<--install_base> in MB to the same location. perl Makefile.PL INSTALL_BASE=/whatever perl Build.PL --install_base /whatever 2) Use PREFIX / C<--prefix> Module::Build 0.28 added support for C<--prefix> which works like MakeMaker's PREFIX. perl Makefile.PL PREFIX=/whatever perl Build.PL --prefix /whatever =item How do I keep from installing man pages? Recent versions of MakeMaker will only install man pages on Unix like operating systems. For an individual module: perl Makefile.PL INSTALLMAN1DIR=none INSTALLMAN3DIR=none If you want to suppress man page installation for all modules you have to reconfigure Perl and tell it 'none' when it asks where to install man pages. =item How do I use a module without installing it? Two ways. One is to build the module normally... perl Makefile.PL make make test ...and then set the PERL5LIB environment variable to point at the blib/lib and blib/arch directories. The other is to install the module in a temporary location. perl Makefile.PL INSTALL_BASE=~/tmp make make test make install And then set PERL5LIB to F<~/tmp/lib/perl5>. This works well when you have multiple modules to work with. It also ensures that the module goes through its full installation process which may modify it. =item PREFIX vs INSTALL_BASE from Module::Build::Cookbook The behavior of PREFIX is complicated and depends closely on how your Perl is configured. The resulting installation locations will vary from machine to machine and even different installations of Perl on the same machine. Because of this, its difficult to document where prefix will place your modules. In contrast, INSTALL_BASE has predictable, easy to explain installation locations. Now that Module::Build and MakeMaker both have INSTALL_BASE there is little reason to use PREFIX other than to preserve your existing installation locations. If you are starting a fresh Perl installation we encourage you to use INSTALL_BASE. If you have an existing installation installed via PREFIX, consider moving it to an installation structure matching INSTALL_BASE and using that instead. =back =head2 Philosophy and History =over 4 =item Why not just use ? Why did MakeMaker reinvent the build configuration wheel? Why not just use autoconf or automake or ppm or Ant or ... There are many reasons, but the major one is cross-platform compatibility. Perl is one of the most ported pieces of software ever. It works on operating systems I've never even heard of (see perlport for details). It needs a build tool that can work on all those platforms and with any wacky C compilers and linkers they might have. No such build tool exists. Even make itself has wildly different dialects. So we have to build our own. =item What is Module::Build and how does it relate to MakeMaker? Module::Build is a project by Ken Williams to supplant MakeMaker. Its primary advantages are: =over 8 =item * pure perl. no make, no shell commands =item * easier to customize =item * cleaner internals =item * less cruft =back Module::Build is the official heir apparent to MakeMaker and we encourage people to work on M::B rather than spending time adding features to MakeMaker. =back =head2 Module Writing =over 4 =item How do I keep my $VERSION up to date without resetting it manually? Often you want to manually set the $VERSION in the main module distribution because this is the version that everybody sees on CPAN and maybe you want to customize it a bit. But for all the other modules in your dist, $VERSION is really just bookkeeping and all that's important is it goes up every time the module is changed. Doing this by hand is a pain and you often forget. Simplest way to do it automatically is to use your version control system's revision number (you are using version control, right?). In CVS, RCS and SVN you use $Revision$ (see the documentation of your version control system for details). Every time the file is checked in the $Revision$ will be updated, updating your $VERSION. SVN uses a simple integer for $Revision$ so you can adapt it for your $VERSION like so: ($VERSION) = q$Revision$ =~ /(\d+)/; In CVS and RCS version 1.9 is followed by 1.10. Since CPAN compares version numbers numerically we use a sprintf() to convert 1.9 to 1.009 and 1.10 to 1.010 which compare properly. $VERSION = sprintf "%d.%03d", q$Revision$ =~ /(\d+)\.(\d+)/g; If branches are involved (ie. $Revision: 1.5.3.4$) its a little more complicated. # must be all on one line or MakeMaker will get confused. $VERSION = do { my @r = (q$Revision$ =~ /\d+/g); sprintf "%d."."%03d" x $#r, @r }; In SVN, $Revision$ should be the same for every file in the project so they would all have the same $VERSION. CVS and RCS have a different $Revision$ per file so each file will have a differnt $VERSION. Distributed version control systems, such as SVK, may have a different $Revision$ based on who checks out the file leading to a different $VERSION on each machine! Finally, some distributed version control systems, such as darcs, have no concept of revision number at all. =item What's this F thing and how did it get in my F?! F is a module meta-data file pioneered by Module::Build and automatically generated as part of the 'distdir' target (and thus 'dist'). See L. To shut off its generation, pass the C flag to C. =item How do I delete everything not in my F? Some folks are surpried that C does not delete everything not listed in their MANIFEST (thus making a clean distribution) but only tells them what they need to delete. This is done because it is considered too dangerous. While developing your module you might write a new file, not add it to the MANIFEST, then run a C and be sad because your new work was deleted. If you really want to do this, you can use C to read the MANIFEST and File::Find to delete the files. But you have to be careful. Here's a script to do that. Use at your own risk. Have fun blowing holes in your foot. #!/usr/bin/perl -w use strict; use File::Spec; use File::Find; use ExtUtils::Manifest qw(maniread); my %manifest = map {( $_ => 1 )} grep { File::Spec->canonpath($_) } keys %{ maniread() }; if( !keys %manifest ) { print "No files found in MANIFEST. Stopping.\n"; exit; } find({ wanted => sub { my $path = File::Spec->canonpath($_); return unless -f $path; return if exists $manifest{ $path }; print "unlink $path\n"; unlink $path; }, no_chdir => 1 }, "." ); =item Which tar should I use on Windows? We recommend ptar from Archive::Tar not older that 1.66 with '-C' option. =item Which zip should I use on Windows for '[nd]make zipdist'? We recommend InfoZIP: L =back =head2 XS =over 4 =item How to I prevent "object version X.XX does not match bootstrap parameter Y.YY" errors? XS code is very sensitive to the module version number and will complain if the version number in your Perl module doesn't match. If you change your module's version # without rerunning Makefile.PL the old version number will remain in the Makefile causing the XS code to be built with the wrong number. To avoid this, you can force the Makefile to be rebuilt whenever you change the module containing the version number by adding this to your WriteMakefile() arguments. depend => { '$(FIRST_MAKEFILE)' => '$(VERSION_FROM)' } =item How do I make two or more XS files coexist in the same directory? Sometimes you need to have two and more XS files in the same package. One way to go is to put them into separate directories, but sometimes this is not the most suitable solution. The following technique allows you to put two (and more) XS files in the same directory. Let's assume that we have a package C, which includes C and C modules each having a separate XS file. First we use the following I: use ExtUtils::MakeMaker; WriteMakefile( NAME => 'Cool::Foo', VERSION_FROM => 'Foo.pm', OBJECT => q/$(O_FILES)/, # ... other attrs ... ); Notice the C attribute. MakeMaker generates the following variables in I: # Handy lists of source code files: XS_FILES= Bar.xs \ Foo.xs C_FILES = Bar.c \ Foo.c O_FILES = Bar.o \ Foo.o Therefore we can use the C variable to tell MakeMaker to use these objects into the shared library. That's pretty much it. Now write I and I, I and I, where I bootstraps the shared library and I simply loading I. The only issue left is to how to bootstrap I. This is done from I: MODULE = Cool::Foo PACKAGE = Cool::Foo BOOT: # boot the second XS file boot_Cool__Bar(aTHX_ cv); If you have more than two files, this is the place where you should boot extra XS files from. The following four files sum up all the details discussed so far. Foo.pm: ------- package Cool::Foo; require DynaLoader; our @ISA = qw(DynaLoader); our $VERSION = '0.01'; bootstrap Cool::Foo $VERSION; 1; Bar.pm: ------- package Cool::Bar; use Cool::Foo; # bootstraps Bar.xs 1; Foo.xs: ------- #include "EXTERN.h" #include "perl.h" #include "XSUB.h" MODULE = Cool::Foo PACKAGE = Cool::Foo BOOT: # boot the second XS file boot_Cool__Bar(aTHX_ cv); MODULE = Cool::Foo PACKAGE = Cool::Foo PREFIX = cool_foo_ void cool_foo_perl_rules() CODE: fprintf(stderr, "Cool::Foo says: Perl Rules\n"); Bar.xs: ------- #include "EXTERN.h" #include "perl.h" #include "XSUB.h" MODULE = Cool::Bar PACKAGE = Cool::Bar PREFIX = cool_bar_ void cool_bar_perl_rules() CODE: fprintf(stderr, "Cool::Bar says: Perl Rules\n"); And of course a very basic test: t/cool.t: -------- use Test; BEGIN { plan tests => 1 }; use Cool::Foo; use Cool::Bar; Cool::Foo::perl_rules(); Cool::Bar::perl_rules(); ok 1; This tip has been brought to you by Nick Ing-Simmons and Stas Bekman. =back =head1 PATCHING If you have a question you'd like to see added to the FAQ (whether or not you have the answer) please send it to makemaker@perl.org.package ExtUtils::MakeMaker::Tutorial; our $VERSION = 6.57_01; =head1 NAME ExtUtils::MakeMaker::Tutorial - Writing a module with MakeMaker =head1 SYNOPSIS use ExtUtils::MakeMaker; WriteMakefile( NAME => 'Your::Module', VERSION_FROM => 'lib/Your/Module.pm' ); =head1 DESCRIPTION This is a short tutorial on writing a simple module with MakeMaker. Its really not that hard. =head2 The Mantra MakeMaker modules are installed using this simple mantra perl Makefile.PL make make test make install There are lots more commands and options, but the above will do it. =head2 The Layout The basic files in a module look something like this. Makefile.PL MANIFEST lib/Your/Module.pm That's all that's strictly necessary. There's additional files you might want: lib/Your/Other/Module.pm t/some_test.t t/some_other_test.t Changes README INSTALL MANIFEST.SKIP bin/some_program =over 4 =item Makefile.PL When you run Makefile.PL, it makes a Makefile. That's the whole point of MakeMaker. The Makefile.PL is a simple program which loads ExtUtils::MakeMaker and runs the WriteMakefile() function to generate a Makefile. Here's an example of what you need for a simple module: use ExtUtils::MakeMaker; WriteMakefile( NAME => 'Your::Module', VERSION_FROM => 'lib/Your/Module.pm' ); NAME is the top-level namespace of your module. VERSION_FROM is the file which contains the $VERSION variable for the entire distribution. Typically this is the same as your top-level module. =item MANIFEST A simple listing of all the files in your distribution. Makefile.PL MANIFEST lib/Your/Module.pm File paths in a MANIFEST always use Unix conventions (ie. /) even if you're not on Unix. You can write this by hand or generate it with 'make manifest'. See L for more details. =item lib/ This is the directory where your .pm and .pod files you wish to have installed go. They are layed out according to namespace. So Foo::Bar is F. =item t/ Tests for your modules go here. Each test filename ends with a .t. So F/ 'make test' will run these tests. The directory is flat, you cannot, for example, have t/foo/bar.t run by 'make test'. Tests are run from the top level of your distribution. So inside a test you would refer to ./lib to enter the lib directory, for example. =item Changes A log of changes you've made to this module. The layout is free-form. Here's an example: 1.01 Fri Apr 11 00:21:25 PDT 2003 - thing() does some stuff now - fixed the wiggy bug in withit() 1.00 Mon Apr 7 00:57:15 PDT 2003 - "Rain of Frogs" now supported =item README A short description of your module, what it does, why someone would use it and its limitations. CPAN automatically pulls your README file out of the archive and makes it available to CPAN users, it is the first thing they will read to decide if your module is right for them. =item INSTALL Instructions on how to install your module along with any dependencies. Suggested information to include here: any extra modules required for use the minimum version of Perl required if only works on certain operating systems =item MANIFEST.SKIP A file full of regular expressions to exclude when using 'make manifest' to generate the MANIFEST. These regular expressions are checked against each file path found in the distribution (so you're matching against "t/foo.t" not "foo.t"). Here's a sample: ~$ # ignore emacs and vim backup files .bak$ # ignore manual backups \# # ignore CVS old revision files and emacs temp files Since # can be used for comments, # must be escaped. MakeMaker comes with a default MANIFEST.SKIP to avoid things like version control directories and backup files. Specifying your own will override this default. =item bin/ =back package ExtUtils::MakeMaker::YAML; use strict; # UTF Support? sub HAVE_UTF8 () { $] >= 5.007003 } BEGIN { if ( HAVE_UTF8 ) { # The string eval helps hide this from Test::MinimumVersion eval "require utf8;"; die "Failed to load UTF-8 support" if $@; } # Class structure require 5.004; require Exporter; require Carp; $ExtUtils::MakeMaker::YAML::VERSION = '1.44'; @ExtUtils::MakeMaker::YAML::ISA = qw{ Exporter }; @ExtUtils::MakeMaker::YAML::EXPORT = qw{ Load Dump }; @ExtUtils::MakeMaker::YAML::EXPORT_OK = qw{ LoadFile DumpFile freeze thaw }; # Error storage $ExtUtils::MakeMaker::YAML::errstr = ''; } # The character class of all characters we need to escape # NOTE: Inlined, since it's only used once # my $RE_ESCAPE = '[\\x00-\\x08\\x0b-\\x0d\\x0e-\\x1f\"\n]'; # Printed form of the unprintable characters in the lowest range # of ASCII characters, listed by ASCII ordinal position. my @UNPRINTABLE = qw( z x01 x02 x03 x04 x05 x06 a x08 t n v f r x0e x0f x10 x11 x12 x13 x14 x15 x16 x17 x18 x19 x1a e x1c x1d x1e x1f ); # Printable characters for escapes my %UNESCAPES = ( z => "\x00", a => "\x07", t => "\x09", n => "\x0a", v => "\x0b", f => "\x0c", r => "\x0d", e => "\x1b", '\\' => '\\', ); # Special magic boolean words my %QUOTE = map { $_ => 1 } qw{ null Null NULL y Y yes Yes YES n N no No NO true True TRUE false False FALSE on On ON off Off OFF }; ##################################################################### # Implementation # Create an empty ExtUtils::MakeMaker::YAML object sub new { my $class = shift; bless [ @_ ], $class; } # Create an object from a file sub read { my $class = ref $_[0] ? ref shift : shift; # Check the file my $file = shift or return $class->_error( 'You did not specify a file name' ); return $class->_error( "File '$file' does not exist" ) unless -e $file; return $class->_error( "'$file' is a directory, not a file" ) unless -f _; return $class->_error( "Insufficient permissions to read '$file'" ) unless -r _; # Slurp in the file local $/ = undef; local *CFG; unless ( open(CFG, $file) ) { return $class->_error("Failed to open file '$file': $!"); } my $contents = ; unless ( close(CFG) ) { return $class->_error("Failed to close file '$file': $!"); } $class->read_string( $contents ); } # Create an object from a string sub read_string { my $class = ref $_[0] ? ref shift : shift; my $self = bless [], $class; my $string = $_[0]; unless ( defined $string ) { return $self->_error("Did not provide a string to load"); } # Byte order marks # NOTE: Keeping this here to educate maintainers # my %BOM = ( # "\357\273\277" => 'UTF-8', # "\376\377" => 'UTF-16BE', # "\377\376" => 'UTF-16LE', # "\377\376\0\0" => 'UTF-32LE' # "\0\0\376\377" => 'UTF-32BE', # ); if ( $string =~ /^(?:\376\377|\377\376|\377\376\0\0|\0\0\376\377)/ ) { return $self->_error("Stream has a non UTF-8 BOM"); } else { # Strip UTF-8 bom if found, we'll just ignore it $string =~ s/^\357\273\277//; } # Try to decode as utf8 utf8::decode($string) if HAVE_UTF8; # Check for some special cases return $self unless length $string; unless ( $string =~ /[\012\015]+\z/ ) { return $self->_error("Stream does not end with newline character"); } # Split the file into lines my @lines = grep { ! /^\s*(?:\#.*)?\z/ } split /(?:\015{1,2}\012|\015|\012)/, $string; # Strip the initial YAML header @lines and $lines[0] =~ /^\%YAML[: ][\d\.]+.*\z/ and shift @lines; # A nibbling parser while ( @lines ) { # Do we have a document header? if ( $lines[0] =~ /^---\s*(?:(.+)\s*)?\z/ ) { # Handle scalar documents shift @lines; if ( defined $1 and $1 !~ /^(?:\#.+|\%YAML[: ][\d\.]+)\z/ ) { push @$self, $self->_read_scalar( "$1", [ undef ], \@lines ); next; } } if ( ! @lines or $lines[0] =~ /^(?:---|\.\.\.)/ ) { # A naked document push @$self, undef; while ( @lines and $lines[0] !~ /^---/ ) { shift @lines; } } elsif ( $lines[0] =~ /^\s*\-/ ) { # An array at the root my $document = [ ]; push @$self, $document; $self->_read_array( $document, [ 0 ], \@lines ); } elsif ( $lines[0] =~ /^(\s*)\S/ ) { # A hash at the root my $document = { }; push @$self, $document; $self->_read_hash( $document, [ length($1) ], \@lines ); } else { Carp::croak("ExtUtils::MakeMaker::YAML failed to classify the line '$lines[0]'"); } } $self; } # Deparse a scalar string to the actual scalar sub _read_scalar { my ($self, $string, $indent, $lines) = @_; # Trim trailing whitespace $string =~ s/\s*\z//; # Explitic null/undef return undef if $string eq '~'; # Single quote if ( $string =~ /^\'(.*?)\'\z/ ) { return '' unless defined $1; $string = $1; $string =~ s/\'\'/\'/g; return $string; } # Double quote. # The commented out form is simpler, but overloaded the Perl regex # engine due to recursion and backtracking problems on strings # larger than 32,000ish characters. Keep it for reference purposes. # if ( $string =~ /^\"((?:\\.|[^\"])*)\"\z/ ) { if ( $string =~ /^\"([^\\"]*(?:\\.[^\\"]*)*)\"\z/ ) { # Reusing the variable is a little ugly, # but avoids a new variable and a string copy. $string = $1; $string =~ s/\\"/"/g; $string =~ s/\\([never\\fartz]|x([0-9a-fA-F]{2}))/(length($1)>1)?pack("H2",$2):$UNESCAPES{$1}/gex; return $string; } # Special cases if ( $string =~ /^[\'\"!&]/ ) { Carp::croak("ExtUtils::MakeMaker::YAML does not support a feature in line '$lines->[0]'"); } return {} if $string eq '{}'; return [] if $string eq '[]'; # Regular unquoted string return $string unless $string =~ /^[>|]/; # Error Carp::croak("ExtUtils::MakeMaker::YAML failed to find multi-line scalar content") unless @$lines; # Check the indent depth $lines->[0] =~ /^(\s*)/; $indent->[-1] = length("$1"); if ( defined $indent->[-2] and $indent->[-1] <= $indent->[-2] ) { Carp::croak("ExtUtils::MakeMaker::YAML found bad indenting in line '$lines->[0]'"); } # Pull the lines my @multiline = (); while ( @$lines ) { $lines->[0] =~ /^(\s*)/; last unless length($1) >= $indent->[-1]; push @multiline, substr(shift(@$lines), length($1)); } my $j = (substr($string, 0, 1) eq '>') ? ' ' : "\n"; my $t = (substr($string, 1, 1) eq '-') ? '' : "\n"; return join( $j, @multiline ) . $t; } # Parse an array sub _read_array { my ($self, $array, $indent, $lines) = @_; while ( @$lines ) { # Check for a new document if ( $lines->[0] =~ /^(?:---|\.\.\.)/ ) { while ( @$lines and $lines->[0] !~ /^---/ ) { shift @$lines; } return 1; } # Check the indent level $lines->[0] =~ /^(\s*)/; if ( length($1) < $indent->[-1] ) { return 1; } elsif ( length($1) > $indent->[-1] ) { Carp::croak("ExtUtils::MakeMaker::YAML found bad indenting in line '$lines->[0]'"); } if ( $lines->[0] =~ /^(\s*\-\s+)[^\'\"]\S*\s*:(?:\s+|$)/ ) { # Inline nested hash my $indent2 = length("$1"); $lines->[0] =~ s/-/ /; push @$array, { }; $self->_read_hash( $array->[-1], [ @$indent, $indent2 ], $lines ); } elsif ( $lines->[0] =~ /^\s*\-(\s*)(.+?)\s*\z/ ) { # Array entry with a value shift @$lines; push @$array, $self->_read_scalar( "$2", [ @$indent, undef ], $lines ); } elsif ( $lines->[0] =~ /^\s*\-\s*\z/ ) { shift @$lines; unless ( @$lines ) { push @$array, undef; return 1; } if ( $lines->[0] =~ /^(\s*)\-/ ) { my $indent2 = length("$1"); if ( $indent->[-1] == $indent2 ) { # Null array entry push @$array, undef; } else { # Naked indenter push @$array, [ ]; $self->_read_array( $array->[-1], [ @$indent, $indent2 ], $lines ); } } elsif ( $lines->[0] =~ /^(\s*)\S/ ) { push @$array, { }; $self->_read_hash( $array->[-1], [ @$indent, length("$1") ], $lines ); } else { Carp::croak("ExtUtils::MakeMaker::YAML failed to classify line '$lines->[0]'"); } } elsif ( defined $indent->[-2] and $indent->[-1] == $indent->[-2] ) { # This is probably a structure like the following... # --- # foo: # - list # bar: value # # ... so lets return and let the hash parser handle it return 1; } else { Carp::croak("ExtUtils::MakeMaker::YAML failed to classify line '$lines->[0]'"); } } return 1; } # Parse an array sub _read_hash { my ($self, $hash, $indent, $lines) = @_; while ( @$lines ) { # Check for a new document if ( $lines->[0] =~ /^(?:---|\.\.\.)/ ) { while ( @$lines and $lines->[0] !~ /^---/ ) { shift @$lines; } return 1; } # Check the indent level $lines->[0] =~ /^(\s*)/; if ( length($1) < $indent->[-1] ) { return 1; } elsif ( length($1) > $indent->[-1] ) { Carp::croak("ExtUtils::MakeMaker::YAML found bad indenting in line '$lines->[0]'"); } # Get the key unless ( $lines->[0] =~ s/^\s*([^\'\" ][^\n]*?)\s*:(\s+|$)// ) { if ( $lines->[0] =~ /^\s*[?\'\"]/ ) { Carp::croak("ExtUtils::MakeMaker::YAML does not support a feature in line '$lines->[0]'"); } Carp::croak("ExtUtils::MakeMaker::YAML failed to classify line '$lines->[0]'"); } my $key = $1; # Do we have a value? if ( length $lines->[0] ) { # Yes $hash->{$key} = $self->_read_scalar( shift(@$lines), [ @$indent, undef ], $lines ); } else { # An indent shift @$lines; unless ( @$lines ) { $hash->{$key} = undef; return 1; } if ( $lines->[0] =~ /^(\s*)-/ ) { $hash->{$key} = []; $self->_read_array( $hash->{$key}, [ @$indent, length($1) ], $lines ); } elsif ( $lines->[0] =~ /^(\s*)./ ) { my $indent2 = length("$1"); if ( $indent->[-1] >= $indent2 ) { # Null hash entry $hash->{$key} = undef; } else { $hash->{$key} = {}; $self->_read_hash( $hash->{$key}, [ @$indent, length($1) ], $lines ); } } } } return 1; } # Save an object to a file sub write { my $self = shift; my $file = shift or return $self->_error('No file name provided'); # Write it to the file open( CFG, '>' . $file ) or return $self->_error( "Failed to open file '$file' for writing: $!" ); print CFG $self->write_string; close CFG; return 1; } # Save an object to a string sub write_string { my $self = shift; return '' unless @$self; # Iterate over the documents my $indent = 0; my @lines = (); foreach my $cursor ( @$self ) { push @lines, '---'; # An empty document if ( ! defined $cursor ) { # Do nothing # A scalar document } elsif ( ! ref $cursor ) { $lines[-1] .= ' ' . $self->_write_scalar( $cursor, $indent ); # version object } elsif ( ref $cursor eq 'version' ) { $lines[-1] .= ' ' . $self->_write_scalar( $cursor->stringify, $indent ); # A list at the root } elsif ( ref $cursor eq 'ARRAY' ) { unless ( @$cursor ) { $lines[-1] .= ' []'; next; } push @lines, $self->_write_array( $cursor, $indent, {} ); # A hash at the root } elsif ( ref $cursor eq 'HASH' ) { unless ( %$cursor ) { $lines[-1] .= ' {}'; next; } push @lines, $self->_write_hash( $cursor, $indent, {} ); } else { Carp::croak("Cannot serialize " . ref($cursor)); } } join '', map { "$_\n" } @lines; } sub _write_scalar { my $string = $_[1]; return '~' unless defined $string; return "''" unless length $string; if ( $string =~ /[\x00-\x08\x0b-\x0d\x0e-\x1f\"\'\n]/ ) { $string =~ s/\\/\\\\/g; $string =~ s/"/\\"/g; $string =~ s/\n/\\n/g; $string =~ s/([\x00-\x1f])/\\$UNPRINTABLE[ord($1)]/g; return qq|"$string"|; } if ( $string =~ /(?:^\W|\s)/ or $QUOTE{$string} ) { return "'$string'"; } return $string; } sub _write_array { my ($self, $array, $indent, $seen) = @_; if ( $seen->{refaddr($array)}++ ) { die "ExtUtils::MakeMaker::YAML does not support circular references"; } my @lines = (); foreach my $el ( @$array ) { my $line = (' ' x $indent) . '-'; my $type = ref $el; if ( ! $type ) { $line .= ' ' . $self->_write_scalar( $el, $indent + 1 ); push @lines, $line; # version object } elsif ( $type eq 'version' ) { $line .= ' ' . $self->_write_scalar( $el->stringify, $indent + 1 ); push @lines, $line; } elsif ( $type eq 'ARRAY' ) { if ( @$el ) { push @lines, $line; push @lines, $self->_write_array( $el, $indent + 1, $seen ); } else { $line .= ' []'; push @lines, $line; } } elsif ( $type eq 'HASH' ) { if ( keys %$el ) { push @lines, $line; push @lines, $self->_write_hash( $el, $indent + 1, $seen ); } else { $line .= ' {}'; push @lines, $line; } } else { die "ExtUtils::MakeMaker::YAML does not support $type references"; } } @lines; } sub _write_hash { my ($self, $hash, $indent, $seen) = @_; if ( $seen->{refaddr($hash)}++ ) { die "ExtUtils::MakeMaker::YAML does not support circular references"; } my @lines = (); foreach my $name ( sort keys %$hash ) { my $el = $hash->{$name}; my $line = (' ' x $indent) . "$name:"; my $type = ref $el; if ( ! $type ) { $line .= ' ' . $self->_write_scalar( $el, $indent + 1 ); push @lines, $line; # version object } elsif ( $type eq 'version' ) { $line .= ' ' . $self->_write_scalar( $el->stringify, $indent + 1 ); push @lines, $line; } elsif ( $type eq 'ARRAY' ) { if ( @$el ) { push @lines, $line; push @lines, $self->_write_array( $el, $indent + 1, $seen ); } else { $line .= ' []'; push @lines, $line; } } elsif ( $type eq 'HASH' ) { if ( keys %$el ) { push @lines, $line; push @lines, $self->_write_hash( $el, $indent + 1, $seen ); } else { $line .= ' {}'; push @lines, $line; } } else { die "ExtUtils::MakeMaker::YAML does not support $type references"; } } @lines; } # Set error sub _error { $ExtUtils::MakeMaker::YAML::errstr = $_[1]; undef; } # Retrieve error sub errstr { $ExtUtils::MakeMaker::YAML::errstr; } ##################################################################### # YAML Compatibility sub Dump { ExtUtils::MakeMaker::YAML->new(@_)->write_string; } sub Load { my $self = ExtUtils::MakeMaker::YAML->read_string(@_); unless ( $self ) { Carp::croak("Failed to load YAML document from string"); } if ( wantarray ) { return @$self; } else { # To match YAML.pm, return the last document return $self->[-1]; } } BEGIN { *freeze = *Dump; *thaw = *Load; } sub DumpFile { my $file = shift; ExtUtils::MakeMaker::YAML->new(@_)->write($file); } sub LoadFile { my $self = ExtUtils::MakeMaker::YAML->read($_[0]); unless ( $self ) { Carp::croak("Failed to load YAML document from '" . ($_[0] || '') . "'"); } if ( wantarray ) { return @$self; } else { # Return only the last document to match YAML.pm, return $self->[-1]; } } ##################################################################### # Use Scalar::Util if possible, otherwise emulate it BEGIN { eval { require Scalar::Util; *refaddr = *Scalar::Util::refaddr; }; eval <<'END_PERL' if $@; # Failed to load Scalar::Util sub refaddr { my $pkg = ref($_[0]) or return undef; if (!!UNIVERSAL::can($_[0], 'can')) { bless $_[0], 'Scalar::Util::Fake'; } else { $pkg = undef; } "$_[0]" =~ /0x(\w+)/; my $i = do { local $^W; hex $1 }; bless $_[0], $pkg if defined $pkg; $i; } END_PERL } 1; __END__ =pod =head1 NAME ExtUtils::MakeMaker::YAML - clone of YAML::Tiny =head1 SYNOPSIS See L =head1 AUTHOR Adam Kennedy Eadamk@cpan.orgE =head1 SEE ALSO L, L =head1 COPYRIGHT Copyright 2006 - 2010 Adam Kennedy. This program is free software; you can redistribute it a=head1 NAME File::Basename - Parse file paths into directory, filename and suffix. =head1 SYNOPSIS use File::Basename; ($name,$path,$suffix) = fileparse($fullname,@suffixlist); $name = fileparse($fullname,@suffixlist); $basename = basename($fullname,@suffixlist); $dirname = dirname($fullname); =head1 DESCRIPTION These routines allow you to parse file paths into their directory, filename and suffix. B: C and C emulate the behaviours, and quirks, of the shell and C functions of the same name. See each function's documentation for details. If your concern is just parsing paths it is safer to use L's C and C methods. It is guaranteed that # Where $path_separator is / for Unix, \ for Windows, etc... dirname($path) . $path_separator . basename($path); is equivalent to the original path for all systems but VMS. =cut package File::Basename; # File::Basename is used during the Perl build, when the re extension may # not be available, but we only actually need it if running under tainting. BEGIN { if (${^TAINT}) { require re; re->import('taint'); } } use strict; use 5.006; use warnings; our(@ISA, @EXPORT, $VERSION, $Fileparse_fstype, $Fileparse_igncase); require Exporter; @ISA = qw(Exporter); @EXPORT = qw(fileparse fileparse_set_fstype basename dirname); $VERSION = "2.82"; fileparse_set_fstype($^O); =over 4 =item C X my($filename, $directories, $suffix) = fileparse($path); my($filename, $directories, $suffix) = fileparse($path, @suffixes); my $filename = fileparse($path, @suffixes); The C routine divides a file path into its $directories, $filename and (optionally) the filename $suffix. $directories contains everything up to and including the last directory separator in the $path including the volume (if applicable). The remainder of the $path is the $filename. # On Unix returns ("baz", "/foo/bar/", "") fileparse("/foo/bar/baz"); # On Windows returns ("baz", 'C:\foo\bar\', "") fileparse('C:\foo\bar\baz'); # On Unix returns ("", "/foo/bar/baz/", "") fileparse("/foo/bar/baz/"); If @suffixes are given each element is a pattern (either a string or a C) matched against the end of the $filename. The matching portion is removed and becomes the $suffix. # On Unix returns ("baz", "/foo/bar/", ".txt") fileparse("/foo/bar/baz.txt", qr/\.[^.]*/); If type is non-Unix (see C) then the pattern matching for suffix removal is performed case-insensitively, since those systems are not case-sensitive when opening existing files. You are guaranteed that C<$directories . $filename . $suffix> will denote the same location as the original $path. =cut sub fileparse { my($fullname,@suffices) = @_; unless (defined $fullname) { require Carp; Carp::croak("fileparse(): need a valid pathname"); } my $orig_type = ''; my($type,$igncase) = ($Fileparse_fstype, $Fileparse_igncase); my($taint) = substr($fullname,0,0); # Is $fullname tainted? if ($type eq "VMS" and $fullname =~ m{/} ) { # We're doing Unix emulation $orig_type = $type; $type = 'Unix'; } my($dirpath, $basename); if (grep { $type eq $_ } qw(MSDOS DOS MSWin32 Epoc)) { ($dirpath,$basename) = ($fullname =~ /^((?:.*[:\\\/])?)(.*)/s); $dirpath .= '.\\' unless $dirpath =~ /[\\\/]\z/; } elsif ($type eq "OS2") { ($dirpath,$basename) = ($fullname =~ m#^((?:.*[:\\/])?)(.*)#s); $dirpath = './' unless $dirpath; # Can't be 0 $dirpath .= '/' unless $dirpath =~ m#[\\/]\z#; } elsif ($type eq "MacOS") { ($dirpath,$basename) = ($fullname =~ /^(.*:)?(.*)/s); $dirpath = ':' unless $dirpath; } elsif ($type eq "AmigaOS") { ($dirpath,$basename) = ($fullname =~ /(.*[:\/])?(.*)/s); $dirpath = './' unless $dirpath; } elsif ($type eq 'VMS' ) { ($dirpath,$basename) = ($fullname =~ /^(.*[:>\]])?(.*)/s); $dirpath ||= ''; # should always be defined } else { # Default to Unix semantics. ($dirpath,$basename) = ($fullname =~ m{^(.*/)?(.*)}s); if ($orig_type eq 'VMS' and $fullname =~ m{^(/[^/]+/000000(/|$))(.*)}) { # dev:[000000] is top of VMS tree, similar to Unix '/' # so strip it off and treat the rest as "normal" my $devspec = $1; my $remainder = $3; ($dirpath,$basename) = ($remainder =~ m{^(.*/)?(.*)}s); $dirpath ||= ''; # should always be defined $dirpath = $devspec.$dirpath; } $dirpath = './' unless $dirpath; } my $tail = ''; my $suffix = ''; if (@suffices) { foreach $suffix (@suffices) { my $pat = ($igncase ? '(?i)' : '') . "($suffix)\$"; if ($basename =~ s/$pat//s) { $taint .= substr($suffix,0,0); $tail = $1 . $tail; } } } # Ensure taint is propagated from the path to its pieces. $tail .= $taint; wantarray ? ($basename .= $taint, $dirpath .= $taint, $tail) : ($basename .= $taint); } =item C X X my $filename = basename($path); my $filename = basename($path, @suffixes); This function is provided for compatibility with the Unix shell command C. It does B always return the file name portion of a path as you might expect. To be safe, if you want the file name portion of a path use C. C returns the last level of a filepath even if the last level is clearly directory. In effect, it is acting like C for paths. This differs from C's behaviour. # Both return "bar" basename("/foo/bar"); basename("/foo/bar/"); @suffixes work as in C except all regex metacharacters are quoted. # These two function calls are equivalent. my $filename = basename("/foo/bar/baz.txt", ".txt"); my $filename = fileparse("/foo/bar/baz.txt", qr/\Q.txt\E/); Also note that in order to be compatible with the shell command, C does not strip off a suffix if it is identical to the remaining characters in the filename. =cut sub basename { my($path) = shift; # From BSD basename(1) # The basename utility deletes any prefix ending with the last slash `/' # character present in string (after first stripping trailing slashes) _strip_trailing_sep($path); my($basename, $dirname, $suffix) = fileparse( $path, map("\Q$_\E",@_) ); # From BSD basename(1) # The suffix is not stripped if it is identical to the remaining # characters in string. if( length $suffix and !length $basename ) { $basename = $suffix; } # Ensure that basename '/' == '/' if( !length $basename ) { $basename = $dirname; } return $basename; } =item C X This function is provided for compatibility with the Unix shell command C and has inherited some of its quirks. In spite of its name it does B always return the directory name as you might expect. To be safe, if you want the directory name of a path use C. Only on VMS (where there is no ambiguity between the file and directory portions of a path) and AmigaOS (possibly due to an implementation quirk in this module) does C work like C, returning just the $directories. # On VMS and AmigaOS my $directories = dirname($path); When using Unix or MSDOS syntax this emulates the C shell function which is subtly different from how C works. It returns all but the last level of a file path even if the last level is clearly a directory. In effect, it is not returning the directory portion but simply the path one level up acting like C for file paths. Also unlike C, C does not include a trailing slash on its returned path. # returns /foo/bar. fileparse() would return /foo/bar/ dirname("/foo/bar/baz"); # also returns /foo/bar despite the fact that baz is clearly a # directory. fileparse() would return /foo/bar/baz/ dirname("/foo/bar/baz/"); # returns '.'. fileparse() would repackage File::CheckTree; use 5.006; use Cwd; use Exporter; use File::Spec; use warnings; use strict; our $VERSION = '4.41'; our @ISA = qw(Exporter); our @EXPORT = qw(validate); =head1 NAME File::CheckTree - run many filetest checks on a tree =head1 SYNOPSIS use File::CheckTree; $num_warnings = validate( q{ /vmunix -e || die /boot -e || die /bin cd csh -ex csh !-ug sh -ex sh !-ug /usr -d || warn "What happened to $file?\n" }); =head1 DESCRIPTION The validate() routine takes a single multiline string consisting of directives, each containing a filename plus a file test to try on it. (The file test may also be a "cd", causing subsequent relative filenames to be interpreted relative to that directory.) After the file test you may put C<|| die> to make it a fatal error if the file test fails. The default is C<|| warn>. The file test may optionally have a "!' prepended to test for the opposite condition. If you do a cd and then list some relative filenames, you may want to indent them slightly for readability. If you supply your own die() or warn() message, you can use $file to interpolate the filename. Filetests may be bunched: "-rwx" tests for all of C<-r>, C<-w>, and C<-x>. Only the first failed test of the bunch will produce a warning. The routine returns the number of warnings issued. =head1 AUTHOR File::CheckTree was derived from lib/validate.pl which was written by Larry Wall. Revised by Paul Grassie > in 2002. =head1 HISTORY File::CheckTree used to not display fatal error messages. It used to count only those warnings produced by a generic C<|| warn> (and not those in which the user supplied the message). In addition, the validate() routine would leave the user program in whatever directory was last entered through the use of "cd" directives. These bugs were fixed during the development of perl 5.8. The first fixed version of File::CheckTree was 4.2. =cut my $Warnings; sub validate { my ($starting_dir, $file, $test, $cwd, $oldwarnings); $starting_dir = cwd; $cwd = ""; $Warnings = 0; foreach my $check (split /\n/, $_[0]) { my ($testlist, @testlist); # skip blanks/comments next if $check =~ /^\s*#/ || $check =~ /^\s*$/; # Todo: # should probably check for invalid directives and die # but earlier versions of File::CheckTree did not do this either # split a line like "/foo -r || die" # so that $file is "/foo", $test is "-r || die" # (making special allowance for quoted filenames). if ($check =~ m/^\s*"([^"]+)"\s+(.*?)\s*$/ or $check =~ m/^\s*'([^']+)'\s+(.*?)\s*$/ or $check =~ m/^\s*(\S+?)\s+(\S.*?)\s*$/) { ($file, $test) = ($1,$2); } else { die "Malformed line: '$check'"; }; # change a $test like "!-ug || die" to "!-Z || die", # capturing the bundled tests (e.g. "ug") in $2 if ($test =~ s/ ^ (!?-) (\w{2,}) \b /$1Z/x) { $testlist = $2; # split bundled tests, e.g. "ug" to 'u', 'g' @testlist = split(//, $testlist); } else { # put in placeholder Z for stand-alone test @testlist = ('Z'); } # will compare these two later to stop on 1st warning w/in a bundle $oldwarnings = $Warnings; foreach my $one (@testlist) { # examples of $test: "!-Z || die" or "-w || warn" my $this = $test; # expand relative $file to full pathname if preceded by cd directive $file = File::Spec->catfile($cwd, $file) if $cwd && !File::Spec->file_name_is_absolute($file); # put filename in after the test operator $this =~ s/(-\w\b)/$1 "\$file"/g; # change the "-Z" representing a bundle with the $one test $this =~ s/-Z/-$one/; # if it's a "cd" directive... if ($this =~ /^cd\b/) { # add "|| die ..." $this .= ' || die "cannot cd to $file\n"'; # expand "cd" directive with directory name $this =~ s/\bcd\b/chdir(\$cwd = '$file')/; } else { # add "|| warn" as a default disposition $this .= ' || warn' unless $this =~ /\|\|/; # change a generic ".. || die" or ".. || warn" # to call valmess instead of die/warn directly # valmess will look up the error message from %Val_Message $this =~ s/ ^ ( (\S+) \s+ \S+ ) \s* \|\| \s* (die|warn) \s* $ /$1 || valmess('$3', '$2', \$file)/x; } { # count warnings, either from valmess or '-r || warn "my msg"' # also, call any pre-existing signal handler for __WARN__ my $orig_sigwarn = $SIG{__WARN__}; local $SIG{__WARN__} = sub { ++$Warnings; if ( $orig_sigwarn ) { $orig_sigwarn->(@_); } else { warn "@_"; } }; # do the test eval $this; # re-raise an exception caused by a "... || die" test if (my $err = $@) { # in case of any cd directives, return from whence we came if ($starting_dir ne cwd) { chdir($starting_dir) || die "$starting_dir: $!"; } die $err; } } # stop on 1st warning within a bundle of tests last if $Warnings > $oldwarnings; } } # in case of any cd directives, return from whence we came if ($starting_dir ne cwd) { chdir($starting_dir) || die "chdir $starting_dir: $!"; } return $Warnings; } my %Val_Message = ( 'r' => "is not readable by uid $>.", 'w' => "is not writable by uid $>.", 'x' => "is not executable by uid $>.", 'o' => "is not owned by uid $>.", 'R' => "is not readable by you.", 'W' => "is not writable by you.", 'X' => "is not executable by you.", 'O' => "is not owned by you.", 'e' => "does not exist.", 'z' => "does not have zero size.", 's' => "does not have non-zero size.", 'f' => "is not a plain file.", 'd' => "is not a directory.", 'l' => "is not a symbolic link.", 'p' => "is not a named pipe (FIFO).", 'S' => "is not a socket.", 'b' => "is not a block special file.", 'c' => "is not a character special file.", 'u' => "does not have the setuid bit set.", 'g' => "does not have the setgid bit set.", 'k' => "does not have the sticky bit set.", 'T' => "is not a text file.", 'B' => "is not a binary file." ); sub valmess { my ($disposition, $test, $file) = @_; my $ferror; if ($test =~ / ^ (!?) -(\w) \s* $ /x) { my ($neg, $ftype) = ($1, $2); $ferror = "$file $Val_Message{$ftype}"; if ($neg eq '!') { $ferror =~ s/ is not / should not be / || $ferror =~ s/ does not / should not / || $ferror =~ s/ not / /; } } else { $ferror = "Can't do $test $file.\n"; } die "$ferror\n" if $disposition eq 'die'; warn "$ferror\n"; } 1; package File::Compare; use 5.006; use strict; use warnings; our($VERSION, @ISA, @EXPORT, @EXPORT_OK, $Too_Big); require Exporter; $VERSION = '1.1006'; @ISA = qw(Exporter); @EXPORT = qw(compare); @EXPORT_OK = qw(cmp compare_text); $Too_Big = 1024 * 1024 * 2; sub croak { require Carp; goto &Carp::croak; } sub compare { croak("Usage: compare( file1, file2 [, buffersize]) ") unless(@_ == 2 || @_ == 3); my ($from,$to,$size) = @_; my $text_mode = defined($size) && (ref($size) eq 'CODE' || $size < 0); my ($fromsize,$closefrom,$closeto); local (*FROM, *TO); croak("from undefined") unless (defined $from); croak("to undefined") unless (defined $to); if (ref($from) && (UNIVERSAL::isa($from,'GLOB') || UNIVERSAL::isa($from,'IO::Handle'))) { *FROM = *$from; } elsif (ref(\$from) eq 'GLOB') { *FROM = $from; } else { open(FROM,"<",$from) or goto fail_open1; unless ($text_mode) { binmode FROM; $fromsize = -s FROM; } $closefrom = 1; } if (ref($to) && (UNIVERSAL::isa($to,'GLOB') || UNIVERSAL::isa($to,'IO::Handle'))) { *TO = *$to; } elsif (ref(\$to) eq 'GLOB') { *TO = $to; } else { open(TO,"<",$to) or goto fail_open2; binmode TO unless $text_mode; $closeto = 1; } if (!$text_mode && $closefrom && $closeto) { # If both are opened files we know they differ if their size differ goto fail_inner if $fromsize != -s TO; } if ($text_mode) { local $/ = "\n"; my ($fline,$tline); while (defined($fline = )) { goto fail_inner unless defined($tline = ); if (ref $size) { # $size contains ref to comparison function goto fail_inner if &$size($fline, $tline); } else { goto fail_inner if $fline ne $tline; } } goto fail_inner if defined($tline = ); } else { unless (defined($size) && $size > 0) { $size = $fromsize || -s TO || 0; $size = 1024 if $size < 512; $size = $Too_Big if $size > $Too_Big; } my ($fr,$tr,$fbuf,$tbuf); $fbuf = $tbuf = ''; while(defined($fr = read(FROM,$fbuf,$size)) && $fr > 0) { unless (defined($tr = read(TO,$tbuf,$fr)) && $tbuf eq $fbuf) { goto fail_inner; } } goto fail_inner if defined($tr = read(TO,$tbuf,$size)) && $tr > 0; } close(TO) || goto fail_open2 if $closeto; close(FROM) || goto fail_open1 if $closefrom; return 0; # All of these contortions try to preserve error messages... fail_inner: close(TO) || goto fail_open2 if $closeto; close(FROM) || goto fail_open1 if $closefrom; return 1; fail_open2: if ($closefrom) { my $status = $!; $! = 0; close FROM; $! = $status unless $!; } fail_open1: return -1; } sub cmp; *cmp = \&compare; sub compare_text { my ($from,$to,$cmp) = @_; croak("Usage: compare_text( file1, file2 [, cmp-function])") unless @_ == 2 || @_ == 3; croak("Third arg to compare_text() function must be a code reference") if @_ == 3 && ref($cmp) ne 'CODE'; # Using a negative buffer size puts compare into text_mode too $cmp = -1 unless defined $cmp; compare($from, $to, $cmp); } 1; __END__ =head1 NAME File::Compare - Compare files or filehandles =head1 SYNOPSIS use File::Compare; if (compare("file1","file2") == 0) { print "They're equal\n"; } =head1 DESCRIPTION The File::Compare::compare function compares the contents of two sources, each of which can be a file or a file handle. It is exported from File::Compare by default. File::Compare::cmp is a synonym for File::Compare::compare. It is exported from File::Compare only by request. File::Compare::compare_text does a line by line comparison of the two files. It stops as soon as a difference is detected. compare_text() accepts an optional third argument: This must be a CODE reference to a line comparison function, which returns 0 when both lines are considered equal. For example: compare_text($file1, $file2) is basically equivalent to compare_text($file1, $file2, sub {$_[0] ne $_[1]} ) =head1 RETURN File::Compare::compare and its sibling# File/Copy.pm. Written in 1994 by Aaron Sherman . This # source code has been placed in the public domain by the author. # Please be kind and preserve the documentation. # # Additions copyright 1996 by Charles Bailey. Permission is granted # to distribute the revised code under the same terms as Perl itself. package File::Copy; use 5.006; use strict; use warnings; use File::Spec; use Config; # During perl build, we need File::Copy but Scalar::Util might not be built yet # And then we need these games to avoid loading overload, as that will # confuse miniperl during the bootstrap of perl. my $Scalar_Util_loaded = eval q{ require Scalar::Util; require overload; 1 }; our(@ISA, @EXPORT, @EXPORT_OK, $VERSION, $Too_Big, $Syscopy_is_copy); sub copy; sub syscopy; sub cp; sub mv; $VERSION = '2.21'; require Exporter; @ISA = qw(Exporter); @EXPORT = qw(copy move); @EXPORT_OK = qw(cp mv); $Too_Big = 1024 * 1024 * 2; sub croak { require Carp; goto &Carp::croak; } sub carp { require Carp; goto &Carp::carp; } # Look up the feature settings on VMS using VMS::Feature when available. my $use_vms_feature = 0; BEGIN { if ($^O eq 'VMS') { if (eval { local $SIG{__DIE__}; require VMS::Feature; }) { $use_vms_feature = 1; } } } # Need to look up the UNIX report mode. This may become a dynamic mode # in the future. sub _vms_unix_rpt { my $unix_rpt; if ($use_vms_feature) { $unix_rpt = VMS::Feature::current("filename_unix_report"); } else { my $env_unix_rpt = $ENV{'DECC$FILENAME_UNIX_REPORT'} || ''; $unix_rpt = $env_unix_rpt =~ /^[ET1]/i; } return $unix_rpt; } # Need to look up the EFS character set mode. This may become a dynamic # mode in the future. sub _vms_efs { my $efs; if ($use_vms_feature) { $efs = VMS::Feature::current("efs_charset"); } else { my $env_efs = $ENV{'DECC$EFS_CHARSET'} || ''; $efs = $env_efs =~ /^[ET1]/i; } return $efs; } sub _catname { my($from, $to) = @_; if (not defined &basename) { require File::Basename; import File::Basename 'basename'; } return File::Spec->catfile($to, basename($from)); } # _eq($from, $to) tells whether $from and $to are identical sub _eq { my ($from, $to) = map { $Scalar_Util_loaded && Scalar::Util::blessed($_) && overload::Method($_, q{""}) ? "$_" : $_ } (@_); return '' if ( (ref $from) xor (ref $to) ); return $from == $to if ref $from; return $from eq $to; } sub copy { croak("Usage: copy(FROM, TO [, BUFFERSIZE]) ") unless(@_ == 2 || @_ == 3); my $from = shift; my $to = shift; my $size; if (@_) { $size = shift(@_) + 0; croak("Bad buffer size for copy: $size\n") unless ($size > 0); } my $from_a_handle = (ref($from) ? (ref($from) eq 'GLOB' || UNIVERSAL::isa($from, 'GLOB') || UNIVERSAL::isa($from, 'IO::Handle')) : (ref(\$from) eq 'GLOB')); my $to_a_handle = (ref($to) ? (ref($to) eq 'GLOB' || UNIVERSAL::isa($to, 'GLOB') || UNIVERSAL::isa($to, 'IO::Handle')) : (ref(\$to) eq 'GLOB')); if (_eq($from, $to)) { # works for references, too carp("'$from' and '$to' are identical (not copied)"); # The "copy" was a success as the source and destination contain # the same data. return 1; } if ((($Config{d_symlink} && $Config{d_readlink}) || $Config{d_link}) && !($^O eq 'MSWin32' || $^O eq 'os2')) { my @fs = stat($from); if (@fs) { my @ts = stat($to); if (@ts && $fs[0] == $ts[0] && $fs[1] == $ts[1] && !-p $from) { carp("'$from' and '$to' are identical (not copied)"); return 0; } } } if (!$from_a_handle && !$to_a_handle && -d $to && ! -d $from) { $to = _catname($from, $to); } if (defined &syscopy && !$Syscopy_is_copy && !$to_a_handle && !($from_a_handle && $^O eq 'os2' ) # OS/2 cannot handle handles && !($from_a_handle && $^O eq 'mpeix') # and neither can MPE/iX. && !($from_a_handle && $^O eq 'MSWin32') && !($from_a_handle && $^O eq 'NetWare') ) { my $copy_to = $to; if ($^O eq 'VMS' && -e $from) { if (! -d $to && ! -d $from) { my $vms_efs = _vms_efs(); my $unix_rpt = _vms_unix_rpt(); my $unix_mode = 0; my $from_unix = 0; $from_unix = 1 if ($from =~ /^\.\.?$/); my $from_vms = 0; $from_vms = 1 if ($from =~ m#[\[<\]]#); # Need to know if we are in Unix mode. if ($from_vms == $from_unix) { $unix_mode = $unix_rpt; } else { $unix_mode = $from_unix; } # VMS has sticky defaults on extensions, which means that # if there is a null extension on the destination file, it # will inherit the extension of the source file # So add a '.' for a null extension. # In unix_rpt mode, the trailing dot should not be added. if ($vms_efs) { $copy_to = $to; } else { $copy_to = VMS::Filespec::vmsify($to); } my ($vol, $dirs, $file) = File::Spec->splitpath($copy_to); $file = $file . '.' unless (($file =~ /(?catpath($vol, $dirs, $file); # Get rid of the old versions to be like UNIX 1 while unlink $copy_to; } } return syscopy($from, $copy_to) || 0; } my $closefrom = 0; my $closeto = 0; my ($status, $r, $buf); local($\) = ''; my $from_h; if ($from_a_handle) { $from_h = $from; } else { open $from_h, "<", $from or goto fail_open1; binmode $from_h or die "($!,$^E)"; $closefrom = 1; } # Seems most logical to do this here, in case future changes would want to # make this croak for some reason. unless (defined $size) { $size = tied(*$from_h) ? 0 : -s $from_h || 0; $size = 1024 if ($size < 512); $size = $Too_Big if ($size > $Too_Big); } my $to_h; if ($to_a_handle) { $to_h = $to; } else { $to_h = \do { local *FH }; # XXX is this line obsolete? open $to_h, ">", $to or goto fail_open2; binmode $to_h or die "($!,$^E)"; $closeto = 1; } $! = 0; for (;;) { my ($r, $w, $t); defined($r = sysread($from_h, $buf, $size)) or goto fail_inner; last unless $r; for ($w = 0; $w < $r; $w += $t) { $t = syswrite($to_h, $buf, $r - $w, $w) or goto fail_inner; } } close($to_h) || goto fail_open2 if $closeto; close($from_h) || goto fail_open1 if $closefrom; # Use this idiom to avoid uninitialized value warning. return 1; # All of these contortions try to preserve error messages... fail_inner: if ($closeto) { $status = $!; $! = 0; close $to_h; $! = $status unless $!; } fail_open2: if ($closefrom) { $status = $!; $! = 0; close $from_h; $! = $status unless $!; } fail_open1: return 0; } sub cp { my($from,$to) = @_; my(@fromstat) = stat $from; my(@tostat) = stat $to; my $perm; return 0 unless copy(@_) and @fromstat; if (@tostat) { $perm = $tostat[2]; } else { $perm = $fromstat[2] & ~(umask || 0); @tostat = stat $to; } # Might be more robust to look for S_I* in Fcntl, but we're # trying to avoid dependence on any XS-containing modules, # since File::Copy is used during the Perl build. $perm &= 07777; if ($perm & 06000) { croak("Unable to check setuid/setgid permissions for $to: $!") unless @tostat; if ($perm & 04000 and # setuid $fromstat[4] != $tostat[4]) { # owner must match $perm &= ~06000; } if ($perm & 02000 && $> != 0) { # if not root, setgid my $ok = $fromstat[5] == $tostat[5]; # group must match if ($ok) { # and we must be in group $ok = grep { $_ == $fromstat[5] } split /\s+/, $) } $perm &= ~06000 unless $ok; } } return 0 unless @tostat; return 1 if $perm == ($tostat[2] & 07777); return eval { chmod $perm, $to; } ? 1 : 0; } sub _move { croak("Usage: move(FROM, TO) ") unless @_ == 3; my($from,$to,$fallback) = @_; my($fromsz,$tosz1,$tomt1,$tosz2,$tomt2,$sts,$ossts); if (-d $to && ! -d $from) { $to = _catname($from, $to); } ($tosz1,$tomt1) = (stat($to))[7,9]; $fromsz = -s $from; if ($^O eq 'os2' and defined $tosz1 and defined $fromsz) { # will not rename with overwrite unlink $to; } my $rename_to = $to; if (-$^O eq 'VMS' && -e $from) { if (! -d $to && ! -d $from) { my $vms_efs = _vms_efs(); my $unix_rpt = _vms_unix_rpt(); my $unix_mode = 0; my $from_unix = 0; $from_unix = 1 if ($from =~ /^\.\.?$/); my $from_vms = 0; $from_vms = 1 if ($from =~ m#[\[<\]]#); # Need to know if we are in Unix mode. if ($from_vms == $from_unix) { $unix_mode = $unix_rpt; } else { $unix_mode = $from_unix; } # VMS has sticky defaults on extensions, which means that # if there is a null extension on the destination file, it # will inherit the extension of the source file # So add a '.' for a null extension. # In unix_rpt mode, the trailing dot should not be added. if ($vms_efs) { $rename_to = $to; } else { $rename_to = VMS::Filespec::vmsify($to); } my ($vol, $dirs, $file) = File::Spec->splitpath($rename_to); $file = $file . '.' unless (($file =~ /(?catpath($vol, $dirs, $file); # Get rid of the old versions to be like UNIX 1 while unlink $rename_to; } } return 1 if rename $from, $rename_to; # Did rename return an error even though it succeeded, because $to # is on a remote NFS file system, and NFS lost the server's ack? return 1 if defined($fromsz) && !-e $from && # $from disappeared (($tosz2,$tomt2) = (stat($to))[7,9]) && # $to's there ((!defined $tosz1) || # not before or ($tosz1 != $tosz2 or $tomt1 != $tomt2)) && # was changed $tosz2 == $fromsz; # it's all there ($tosz1,$tomt1) = (stat($to))[7,9]; # just in case rename did something { local $@; eval { local $SIG{__DIE__}; $fallback->($from,$to) or die; my($atime, $mtime) = (stat($from))[8,9]; utime($atime, $mtime, $to); unlink($from) or die; }; return 1 unless $@; } ($sts,$ossts) = ($! + 0, $^E + 0); ($tosz2,$tomt2) = ((stat($to))[7,9],0,0) if defined $tomt1; unlink($to) if !defined($tomt1) or $tomt1 != $tomt2 or $tosz1 != $tosz2; ($!,$^E) = ($sts,$ossts); return 0; } sub move { _move(@_,\©); } sub mv { _move(@_,\&cp); } # &syscopy is an XSUB under OS/2 unless (defined &syscopy) { if ($^O eq 'VMS') { *syscopy = \&rmscopy; } elsif ($^O eq 'mpeix') { *syscopy = sub { return 0 unless @_ == 2; # Use the MPE cp program in order to # preserve MPE file attributes. return system('/bin/cp', '-f', $_[0], $_[1]) == 0; }; } elsif ($^O eq 'MSWin32' && defined &DynaLoader::boot_DynaLoader) { # Win32::CopyFile() fill only work if we can load Win32.xs *syscopy = sub { return 0 unless @_ == 2; return Win32::CopyFile(@_, 1); }; } else { $Syscopy_is_copy = 1; *syscopy = \© } } 1; __END__ =head1 NAME File::Copy - Copy files or filehandles =head1 SYNOPSIS use File::Copy; copy("file1","file2") or die "Copy failed: $!"; copy("Copy.pm",\*STDOUT); move("/dev1/fileA","/dev2/fileB"); use File::Copy "cp"; $n = FileHandle->new("/a/file","r"); cp($n,"x"); =head1 DESCRIPTION The File::Copy module provides two basic functions, C and C, which are useful for getting the contents of a file from one place to another. =over 4 =item copy X X The C function takes two parameters: a file to copy from and a file to copy to. Either argument may be a string, a FileHandle reference or a FileHandle glob. Obviously, if the first argument is a filehandle of some sort, it will be read from, and if it is a file I it will be opened for reading. Likewise, the second argument will be written to (and created if need be). Trying to copy a file on top of itself is a fatal error. If the destination (second argument) already exists and is a directory, and the source (first argument) is not a filehandle, then the source file will be copied into the directory specified by the destination, using the same base name as the source file. It's a failure to have a filehandle as the source when the destination is a directory. B Files are opened in binary mode where applicable. To get a consistent behaviour when copying from a filehandle to a file, use C on the filehandle. An optional third parameter can be used to specify the buffer size used for copying. This is the number of bytes from the first file, that will be held in memory at any given time, before being written to the second file. The default buffer size depends upon the file, but will generally be the whole file (up to 2MB), or 1k for filehandles that do not reference files (eg. sockets). You may use the syntax C to get at the C alias for this function. The syntax is I the same. The behavior is nearly the same as well: as of version 2.15, will preserve the source file's permission bits like the shell utility C would do, while C uses the default permissions for the target file (which may depend on the process' C, file ownership, inherited ACLs, etc.). If an error occurs in setting permissions, C will return 0, regardless of whether the file was successfully copied. =item move X X X The C function also takes two parameters: the current name and the intended name of the file to be moved. If the destination already exists and is a directory, and the source is not a directory, then the source file will be renamed into the directory specified by the destination. If possible, move() will simply rename the file. Otherwise, it copies the file to the new location and deletes the original. If an error occurs during this copy-and-delete process, you may be left with a (possibly partial) copy of the file under the destination name. You may use the C alias for this function in the same way that you may use the alias for C. =item syscopy X File::Copy also provides the C routine, which copies the file specified in the first parameter to the file specified in the second parameter, preserving OS-specific attributes and file structure. For Unix systems, this is equivalent to the simple C routine, which doesn't preserve OS-specific attributes. For VMS systems, this calls the C routine (see below). For OS/2 systems, this calls the C XSUB directly. For Win32 systems, this calls C. B is defined (OS/2, VMS and Win32)>: If both arguments to C are not file handles, then C will perform a "system copy" of the input file to a new output file, in order to preserve file attributes, indexed file structure, I The buffer size parameter is ignored. If either argument to C is a handle to an opened file, then data is copied using Perl operators, and no effort is made to pr#!perl -w # use strict fails #Can't use string ("main::glob") as a symbol ref while "strict refs" in use at /usr/lib/perl5/5.005/File/DosGlob.pm line 191. # # Documentation at the __END__ # package File::DosGlob; our $VERSION = '1.04'; use strict; use warnings; sub doglob { my $cond = shift; my @retval = (); my $fix_drive_relative_paths; #print "doglob: ", join('|', @_), "\n"; OUTER: for my $pat (@_) { my @matched = (); my @globdirs = (); my $head = '.'; my $sepchr = '/'; my $tail; next OUTER unless defined $pat and $pat ne ''; # if arg is within quotes strip em and do no globbing if ($pat =~ /^"(.*)"\z/s) { $pat = $1; if ($cond eq 'd') { push(@retval, $pat) if -d $pat } else { push(@retval, $pat) if -e $pat } next OUTER; } # wildcards with a drive prefix such as h:*.pm must be changed # to h:./*.pm to expand correctly if ($pat =~ m|^([A-Za-z]:)[^/\\]|s) { substr($pat,0,2) = $1 . "./"; $fix_drive_relative_paths = 1; } if ($pat =~ m|^(.*)([\\/])([^\\/]*)\z|s) { ($head, $sepchr, $tail) = ($1,$2,$3); #print "div: |$head|$sepchr|$tail|\n"; push (@retval, $pat), next OUTER if $tail eq ''; if ($head =~ /[*?]/) { @globdirs = doglob('d', $head); push(@retval, doglob($cond, map {"$_$sepchr$tail"} @globdirs)), next OUTER if @globdirs; } $head .= $sepchr if $head eq '' or $head =~ /^[A-Za-z]:\z/s; $pat = $tail; } # # If file component has no wildcards, we can avoid opendir unless ($pat =~ /[*?]/) { $head = '' if $head eq '.'; $head .= $sepchr unless $head eq '' or substr($head,-1) eq $sepchr; $head .= $pat; if ($cond eq 'd') { push(@retval,$head) if -d $head } else { push(@retval,$head) if -e $head } next OUTER; } opendir(D, $head) or next OUTER; my @leaves = readdir D; closedir D; $head = '' if $head eq '.'; $head .= $sepchr unless $head eq '' or substr($head,-1) eq $sepchr; # escape regex metachars but not glob chars $pat =~ s:([].+^\-\${}()[|]):\\$1:g; # and convert DOS-style wildcards to regex $pat =~ s/\*/.*/g; $pat =~ s/\?/.?/g; #print "regex: '$pat', head: '$head'\n"; my $matchsub = sub { $_[0] =~ m|^$pat\z|is }; INNER: for my $e (@leaves) { next INNER if $e eq '.' or $e eq '..'; next INNER if $cond eq 'd' and ! -d "$head$e"; push(@matched, "$head$e"), next INNER if &$matchsub($e); # # [DOS compatibility special case] # Failed, add a trailing dot and try again, but only # if name does not have a dot in it *and* pattern # has a dot *and* name is shorter than 9 chars. # if (index($e,'.') == -1 and length($e) < 9 and index($pat,'\\.') != -1) { push(@matched, "$head$e"), next INNER if &$matchsub("$e."); } } push @retval, @matched if @matched; } if ($fix_drive_relative_paths) { s|^([A-Za-z]:)\./|$1| for @retval; } return @retval; } # # this can be used to override CORE::glob in a specific # package by saying C in that # namespace. # # context (keyed by second cxix arg provided by core) my %iter; my %entries; sub glob { my($pat,$cxix) = @_; my @pat; # glob without args defaults to $_ $pat = $_ unless defined $pat; # extract patterns if ($pat =~ /\s/) { require Text::ParseWords; @pat = Text::ParseWords::parse_line('\s+',0,$pat); } else { push @pat, $pat; } # Mike Mestnik: made to do abc{1,2,3} == abc1 abc2 abc3. # abc3 will be the original {3} (and drop the {}). # abc1 abc2 will be put in @appendpat. # This was just the esiest way, not nearly the best. REHASH: { my @appendpat = (); for (@pat) { # There must be a "," I.E. abc{efg} is not what we want. while ( /^(.*)(?; # from the command line (overrides only in main::) > perl -MFile::DosGlob=glob -e "print <../pe*/*p?>" =head1 DESCRIPTION A module that implements DOS-like globbing with a few enhancements. It is largely compatible with perlglob.exe (the M$ setargv.obj version) in all but one respect--it understands wildcards in directory components. For example, C<<..\\l*b\\file/*glob.p?>> will work as expected (in that it will find something like '..\lib\File/DosGlob.pm' alright). Note that all path components are case-insensitive, and that backslashes and forward slashes are both accepted, and preserved. You may have to double the backslashes if you are putting them in literally, due to double-quotish parsing of the pattern by perl. Spaces in the argument delimit distinct patterns, so C globs all filenames that end in C<.exe> or C<.dll>. If you want to put in literal spaces in the glob pattern, you can escape them with either double quotes, or backslashes. e.g. C, or C. The argument is tokenized using C, so see L for details of the quoting rules used. Extending it to csh patterns is left as an exercise to the reader. =head1 EXPORTS (by request only) glob() =head1 BUGS Should probably be built into the core, and needs to stop pandering to DOS habits. Needs a dose of optimizium too. =head1 AUTHOR Gurusamy Sarathy =head1 HISTORY =over 4 =item * Support for globally overriding glob() (GSAR 3-JUN-98) =item * Scalar context, independent iterator context fixes (GSAR 15-SEP-97) =item * A few dir-vs-file optimizations result in glob importation being 10 times faster than using perlglob.exe, and using perlglob.bat is only twice as slow as perlglob.exe (GSAR 28-MAY-97) =item * Several cleanups prompted by lack of compatible perlglob.exe under Borland (GSAR 27-MAY-97) =item * Initial version (GSAR 20-FEB-97) =back =head1 SEE ALSO perl pepackage File::Fetch; use strict; use FileHandle; use File::Temp; use File::Copy; use File::Spec; use File::Spec::Unix; use File::Basename qw[dirname]; use Cwd qw[cwd]; use Carp qw[carp]; use IPC::Cmd qw[can_run run QUOTE]; use File::Path qw[mkpath]; use File::Temp qw[tempdir]; use Params::Check qw[check]; use Module::Load::Conditional qw[can_load]; use Locale::Maketext::Simple Style => 'gettext'; use vars qw[ $VERBOSE $PREFER_BIN $FROM_EMAIL $USER_AGENT $BLACKLIST $METHOD_FAIL $VERSION $METHODS $FTP_PASSIVE $TIMEOUT $DEBUG $WARN ]; $VERSION = '0.32'; $VERSION = eval $VERSION; # avoid warnings with development releases $PREFER_BIN = 0; # XXX TODO implement $FROM_EMAIL = 'File-Fetch@example.com'; $USER_AGENT = "File::Fetch/$VERSION"; $BLACKLIST = [qw|ftp|]; $METHOD_FAIL = { }; $FTP_PASSIVE = 1; $TIMEOUT = 0; $DEBUG = 0; $WARN = 1; ### methods available to fetch the file depending on the scheme $METHODS = { http => [ qw|lwp httptiny wget curl lftp fetch httplite lynx iosock| ], ftp => [ qw|lwp netftp wget curl lftp fetch ncftp ftp| ], file => [ qw|lwp lftp file| ], rsync => [ qw|rsync| ] }; ### silly warnings ### local $Params::Check::VERBOSE = 1; local $Params::Check::VERBOSE = 1; local $Module::Load::Conditional::VERBOSE = 0; local $Module::Load::Conditional::VERBOSE = 0; ### see what OS we are on, important for file:// uris ### use constant ON_WIN => ($^O eq 'MSWin32'); use constant ON_VMS => ($^O eq 'VMS'); use constant ON_UNIX => (!ON_WIN); use constant HAS_VOL => (ON_WIN); use constant HAS_SHARE => (ON_WIN); use constant HAS_FETCH => ( $^O =~ m!^(freebsd|netbsd|dragonfly)$! ); =pod =head1 NAME File::Fetch - A generic file fetching mechanism =head1 SYNOPSIS use File::Fetch; ### build a File::Fetch object ### my $ff = File::Fetch->new(uri => 'http://some.where.com/dir/a.txt'); ### fetch the uri to cwd() ### my $where = $ff->fetch() or die $ff->error; ### fetch the uri to /tmp ### my $where = $ff->fetch( to => '/tmp' ); ### parsed bits from the uri ### $ff->uri; $ff->scheme; $ff->host; $ff->path; $ff->file; =head1 DESCRIPTION File::Fetch is a generic file fetching mechanism. It allows you to fetch any file pointed to by a C, C, C, or C uri by a number of different means. See the C section further down for details. =head1 ACCESSORS A C object has the following accessors =over 4 =item $ff->uri The uri you passed to the constructor =item $ff->scheme The scheme from the uri (like 'file', 'http', etc) =item $ff->host The hostname in the uri. Will be empty if host was originally 'localhost' for a 'file://' url. =item $ff->vol On operating systems with the concept of a volume the second element of a file:// is considered to the be volume specification for the file. Thus on Win32 this routine returns the volume, on other operating systems this returns nothing. On Windows this value may be empty if the uri is to a network share, in which case the 'share' property will be defined. Additionally, volume specifications that use '|' as ':' will be converted on read to use ':'. On VMS, which has a volume concept, this field will be empty because VMS file specifications are converted to absolute UNIX format and the volume information is transparently included. =item $ff->share On systems with the concept of a network share (currently only Windows) returns the sharename from a file://// url. On other operating systems returns empty. =item $ff->path The path from the uri, will be at least a single '/'. =item $ff->file The name of the remote file. For the local file name, the result of $ff->output_file will be used. =cut ########################## ### Object & Accessors ### ########################## { ### template for autogenerated accessors ### my $Tmpl = { scheme => { default => 'http' }, host => { default => 'localhost' }, path => { default => '/' }, file => { required => 1 }, uri => { required => 1 }, vol => { default => '' }, # windows for file:// uris share => { default => '' }, # windows for file:// uris _error_msg => { no_override => 1 }, _error_msg_long => { no_override => 1 }, }; for my $method ( keys %$Tmpl ) { no strict 'refs'; *$method = sub { my $self = shift; $self->{$method} = $_[0] if @_; return $self->{$method}; } } sub _create { my $class = shift; my %hash = @_; my $args = check( $Tmpl, \%hash ) or return; bless $args, $class; if( lc($args->scheme) ne 'file' and not $args->host ) { return $class->_error(loc( "Hostname required when fetching from '%1'",$args->scheme)); } for (qw[path file]) { unless( $args->$_() ) { # 5.5.x needs the () return $class->_error(loc("No '%1' specified",$_)); } } return $args; } } =item $ff->output_file The name of the output file. This is the same as $ff->file, but any query parameters are stripped off. For example: http://example.com/index.html?x=y would make the output file be C rather than C. =back =cut sub output_file { my $self = shift; my $file = $self->file; $file =~ s/\?.*$//g; return $file; } ### XXX do this or just point to URI::Escape? # =head2 $esc_uri = $ff->escaped_uri # # =cut # # ### most of this is stolen straight from URI::escape # { ### Build a char->hex map # my %escapes = map { chr($_) => sprintf("%%%02X", $_) } 0..255; # # sub escaped_uri { # my $self = shift; # my $uri = $self->uri; # # ### Default unsafe characters. RFC 2732 ^(uric - reserved) # $uri =~ s/([^A-Za-z0-9\-_.!~*'()])/ # $escapes{$1} || $self->_fail_hi($1)/ge; # # return $uri; # } # # sub _fail_hi { # my $self = shift; # my $char = shift; # # $self->_error(loc( # "Can't escape '%1', try using the '%2' module instead", # sprintf("\\x{%04X}", ord($char)), 'URI::Escape' # )); # } # # sub output_file { # # } # # # } =head1 METHODS =head2 $ff = File::Fetch->new( uri => 'http://some.where.com/dir/file.txt' ); Parses the uri and creates a corresponding File::Fetch::Item object, that is ready to be Ced and returns it. Returns false on failure. =cut sub new { my $class = shift; my %hash = @_; my ($uri); my $tmpl = { uri => { required => 1, store => \$uri }, }; check( $tmpl, \%hash ) or return; ### parse the uri to usable parts ### my $href = $class->_parse_uri( $uri ) or return; ### make it into a FFI object ### my $ff = $class->_create( %$href ) or return; ### return the object ### return $ff; } ### parses an uri to a hash structure: ### ### $class->_parse_uri( 'ftp://ftp.cpan.org/pub/mirror/index.txt' ) ### ### becomes: ### ### $href = { ### scheme => 'ftp', ### host => 'ftp.cpan.org', ### path => '/pub/mirror', ### file => 'index.html' ### }; ### ### In the case of file:// urls there maybe be additional fields ### ### For systems with volume specifications such as Win32 there will be ### a volume specifier provided in the 'vol' field. ### ### 'vol' => 'volumename' ### ### For windows file shares there may be a 'share' key specified ### ### 'share' => 'sharename' ### ### Note that the rules of what a file:// url means vary by the operating system ### of the host being addressed. Thus file:///d|/foo/bar.txt means the obvious ### 'D:\foo\bar.txt' on windows, but on unix it means '/d|/foo/bar.txt' and ### not '/foo/bar.txt' ### ### Similarly if the host interpreting the url is VMS then ### file:///disk$user/my/notes/note12345.txt' means ### 'DISK$USER:[MY.NOTES]NOTE123456.TXT' but will be returned the same as ### if it is unix where it means /disk$user/my/notes/note12345.txt'. ### Except for some cases in the File::Spec methods, Perl on VMS will generally ### handle UNIX format file specifications. ### ### This means it is impossible to serve certain file:// urls on certain systems. ### ### Thus are the problems with a protocol-less specification. :-( ### sub _parse_uri { my $self = shift; my $uri = shift or return; my $href = { uri => $uri }; ### find the scheme ### $uri =~ s|^(\w+)://||; $href->{scheme} = $1; ### See rfc 1738 section 3.10 ### http://www.faqs.org/rfcs/rfc1738.html ### And wikipedia for more on windows file:// urls ### http://en.wikipedia.org/wiki/File:// if( $href->{scheme} eq 'file' ) { my @parts = split '/',$uri; ### file://hostname/... ### file://hostname/... ### normalize file://localhost with file:/// $href->{host} = $parts[0] || ''; ### index in @parts where the path components begin; my $index = 1; ### file:////hostname/sharename/blah.txt if ( HAS_SHARE and not length $parts[0] and not length $parts[1] ) { $href->{host} = $parts[2] || ''; # avoid warnings $href->{share} = $parts[3] || ''; # avoid warnings $index = 4 # index after the share ### file:///D|/blah.txt ### file:///D:/blah.txt } elsif (HAS_VOL) { ### this code comes from dmq's patch, but: ### XXX if volume is empty, wouldn't that be an error? --kane ### if so, our file://localhost test needs to be fixed as wel $href->{vol} = $parts[1] || ''; ### correct D| style colume descriptors $href->{vol} =~ s/\A([A-Z])\|\z/$1:/i if ON_WIN; $index = 2; # index after the volume } ### rebuild the path from the leftover parts; $href->{path} = join '/', '', splice( @parts, $index, $#parts ); } else { ### using anything but qw() in hash slices may produce warnings ### in older perls :-( @{$href}{ qw(host path) } = $uri =~ m|([^/]*)(/.*)$|s; } ### split the path into file + dir ### { my @parts = File::Spec::Unix->splitpath( delete $href->{path} ); $href->{path} = $parts[1]; $href->{file} = $parts[2]; } ### host will be empty if the target was 'localhost' and the ### scheme was 'file' $href->{host} = '' if ($href->{host} eq 'localhost') and ($href->{scheme} eq 'file'); return $href; } =head2 $where = $ff->fetch( [to => /my/output/dir/ | \$scalar] ) Fetches the file you requested and returns the full path to the file. By default it writes to C, but you can override that by specifying the C argument: ### file fetch to /tmp, full path to the file in $where $where = $ff->fetch( to => '/tmp' ); ### file slurped into $scalar, full path to the file in $where ### file is downloaded to a temp directory and cleaned up at exit time $where = $ff->fetch( to => \$scalar ); Returns the full path to the downloaded file on success, and false on failure. =cut sub fetch { my $self = shift or return; my %hash = @_; my $target; my $tmpl = { to => { default => cwd(), store => \$target }, }; check( $tmpl, \%hash ) or return; my ($to, $fh); ### you want us to slurp the contents if( ref $target and UNIVERSAL::isa( $target, 'SCALAR' ) ) { $to = tempdir( 'FileFetch.XXXXXX', CLEANUP => 1 ); ### plain old fetch } else { $to = $target; ### On VMS force to VMS format so File::Spec will work. $to = VMS::Filespec::vmspath($to) if ON_VMS; ### create the path if it doesn't exist yet ### unless( -d $to ) { eval { mkpath( $to ) }; return $self->_error(loc("Could not create path '%1'",$to)) if $@; } } ### set passive ftp if required ### local $ENV{FTP_PASSIVE} = $FTP_PASSIVE; ### we dont use catfile on win32 because if we are using a cygwin tool ### under cmd.exe they wont understand windows style separators. my $out_to = ON_WIN ? $to.'/'.$self->output_file : File::Spec->catfile( $to, $self->output_file ); for my $method ( @{ $METHODS->{$self->scheme} } ) { my $sub = '_'.$method.'_fetch'; unless( __PACKAGE__->can($sub) ) { $self->_error(loc("Cannot call method for '%1' -- WEIRD!", $method)); next; } ### method is blacklisted ### next if grep { lc $_ eq $method } @$BLACKLIST; ### method is known to fail ### next if $METHOD_FAIL->{$method}; ### there's serious issues with IPC::Run and quoting of command ### line arguments. using quotes in the wrong place breaks things, ### and in the case of say, ### C:\cygwin\bin\wget.EXE --quiet --passive-ftp --output-document ### "index.html" "http://www.cpan.org/index.html?q=1&y=2" ### it doesn't matter how you quote, it always fails. local $IPC::Cmd::USE_IPC_RUN = 0; if( my $file = $self->$sub( to => $out_to )){ unless( -e $file && -s _ ) { $self->_error(loc("'%1' said it fetched '%2', ". "but it was not created",$method,$file)); ### mark the failure ### $METHOD_FAIL->{$method} = 1; next; } else { ### slurp mode? if( ref $target and UNIVERSAL::isa( $target, 'SCALAR' ) ) { ### open the file open my $fh, "<$file" or do { $self->_error( loc("Could not open '%1': %2", $file, $!)); return; }; ### slurp $$target = do { local $/; <$fh> }; } my $abs = File::Spec->rel2abs( $file ); return $abs; } } } ### if we got here, we looped over all methods, but we weren't able ### to fetch it. return; } ######################## ### _*_fetch methods ### ######################## ### LWP fetching ### sub _lwp_fetch { my $self = shift; my %hash = @_; my ($to); my $tmpl = { to => { required => 1, store => \$to } }; check( $tmpl, \%hash ) or return; ### modules required to download with lwp ### my $use_list = { LWP => '0.0', 'LWP::UserAgent' => '0.0', 'HTTP::Request' => '0.0', 'HTTP::Status' => '0.0', URI => '0.0', }; if( can_load(modules => $use_list) ) { ### setup the uri object my $uri = URI->new( File::Spec::Unix->catfile( $self->path, $self->file ) ); ### special rules apply for file:// uris ### $uri->scheme( $self->scheme ); $uri->host( $self->scheme eq 'file' ? '' : $self->host ); $uri->userinfo("anonymous:$FROM_EMAIL") if $self->scheme ne 'file'; ### set up the useragent object my $ua = LWP::UserAgent->new(); $ua->timeout( $TIMEOUT ) if $TIMEOUT; $ua->agent( $USER_AGENT ); $ua->from( $FROM_EMAIL ); $ua->env_proxy; my $res = $ua->mirror($uri, $to) or return; ### uptodate or fetched ok ### if ( $res->code == 304 or $res->code == 200 ) { return $to; } else { return $self->_error(loc("Fetch failed! HTTP response: %1 %2 [%3]", $res->code, HTTP::Status::status_message($res->code), $res->status_line)); } } else { $METHOD_FAIL->{'lwp'} = 1; return; } } ### HTTP::Tiny fetching ### sub _httptiny_fetch { my $self = shift; my %hash = @_; my ($to); my $tmpl = { to => { required => 1, store => \$to } }; check( $tmpl, \%hash ) or return; my $use_list = { 'HTTP::Tiny' => '0.008', }; if( can_load(modules => $use_list) ) { my $uri = $self->uri; my $http = HTTP::Tiny->new( ( $TIMEOUT ? ( timeout => $TIMEOUT ) : () ) ); my $rc = $http->mirror( $uri, $to ); unless ( $rc->{success} ) { return $self->_error(loc( "Fetch failed! HTTP response: %1 [%2]", $rc->{status}, $rc->{reason} ) ); } return $to; } else { $METHOD_FAIL->{'httptiny'} = 1; return; } } ### HTTP::Lite fetching ### sub _httplite_fetch { my $self = shift; my %hash = @_; my ($to); my $tmpl = { to => { required => 1, store => \$to } }; check( $tmpl, \%hash ) or return; ### modules required to download with lwp ### my $use_list = { 'HTTP::Lite' => '2.2', }; # https://github.com/dagolden/cpanpm/compare/master...private%2Fuse-http-lite if( can_load(modules => $use_list) ) { my $uri = $self->uri; my $retries = 0; RETRIES: while ( $retries++ < 5 ) { my $http = HTTP::Lite->new(); # Naughty naughty but there isn't any accessor/setter $http->{timeout} = $TIMEOUT if $TIMEOUT; $http->http11_mode(1); my $fh = FileHandle->new; unless ( $fh->open($to,'>') ) { return $self->_error(loc( "Could not open '%1' for writing: %2",$to,$!)); } $fh->autoflush(1); binmode $fh; my $rc = $http->request( $uri, sub { my ($self,$dref,$cbargs) = @_; local $\; print {$cbargs} $$dref }, $fh ); close $fh; if ( $rc == 301 || $rc == 302 ) { my $loc; HEADERS: for ($http->headers_array) { /Location: (\S+)/ and $loc = $1, last HEADERS; } #$loc or last; # Think we should squeal here. if ($loc =~ m!^/!) { $uri =~ s{^(\w+?://[^/]+)/.*$}{$1}; $uri .= $loc; } else { $uri = $loc; } next RETRIES; } elsif ( $rc == 200 ) { return $to; } else { return $self->_error(loc("Fetch failed! HTTP response: %1 [%2]", $rc, $http->status_message)); } } # Loop for 5 retries. return $self->_error("Fetch failed! Gave up after 5 tries"); } else { $METHOD_FAIL->{'httplite'} = 1; return; } } ### Simple IO::Socket::INET fetching ### sub _iosock_fetch { my $self = shift; my %hash = @_; my ($to); my $tmpl = { to => { required => 1, store => \$to } }; check( $tmpl, \%hash ) or return; my $use_list = { 'IO::Socket::INET' => '0.0', 'IO::Select' => '0.0', }; if( can_load(modules => $use_list) ) { my $sock = IO::Socket::INET->new( PeerHost => $self->host, ( $self->host =~ /:/ ? () : ( PeerPort => 80 ) ), ); unless ( $sock ) { return $self->_error(loc("Could not open socket to '%1', '%2'",$self->host,$!)); } my $fh = FileHandle->new; # Check open() unless ( $fh->open($to,'>') ) { return $self->_error(loc( "Could not open '%1' for writing: %2",$to,$!)); } $fh->autoflush(1); binmode $fh; my $path = File::Spec::Unix->catfile( $self->path, $self->file ); my $req = "GET $path HTTP/1.0\x0d\x0aHost: " . $self->host . "\x0d\x0a\x0d\x0a"; $sock->send( $req ); my $select = IO::Select->new( $sock ); my $resp = ''; my $normal = 0; while ( $select->can_read( $TIMEOUT || 60 ) ) { my $ret = $sock->sysread( $resp, 4096, length($resp) ); if ( !defined $ret or $ret == 0 ) { $select->remove( $sock ); $normal++; } } close $sock; unless ( $normal ) { return $self->_error(loc("Socket timed out after '%1' seconds", ( $TIMEOUT || 60 ))); } # Check the "response" # Strip preceding blank lines apparently they are allowed (RFC 2616 4.1) $resp =~ s/^(\x0d?\x0a)+//; # Check it is an HTTP response unless ( $resp =~ m!^HTTP/(\d+)\.(\d+)!i ) { return $self->_error(loc("Did not get a HTTP response from '%1'",$self->host)); } # Check for OK my ($code) = $resp =~ m!^HTTP/\d+\.\d+\s+(\d+)!i; unless ( $code eq '200' ) { return $self->_error(loc("Got a '%1' from '%2' expected '200'",$code,$self->host)); } { local $\; print $fh +($resp =~ m/\x0d\x0a\x0d\x0a(.*)$/s )[0]; } close $fh; return $to; } else { $METHOD_FAIL->{'iosock'} = 1; return; } } ### Net::FTP fetching sub _netftp_fetch { my $self = shift; my %hash = @_; my ($to); my $tmpl = { to => { required => 1, store => \$to } }; check( $tmpl, \%hash ) or return; ### required modules ### my $use_list = { 'Net::FTP' => 0 }; if( can_load( modules => $use_list ) ) { ### make connection ### my $ftp; my @options = ($self->host); push(@options, Timeout => $TIMEOUT) if $TIMEOUT; unless( $ftp = Net::FTP->new( @options ) ) { return $self->_error(loc("Ftp creation failed: %1",$@)); } ### login ### unless( $ftp->login( anonymous => $FROM_EMAIL ) ) { return $self->_error(loc("Could not login to '%1'",$self->host)); } ### set binary mode, just in case ### $ftp->binary; ### create the remote path ### remember remote paths are unix paths! [#11483] my $remote = File::Spec::Unix->catfile( $self->path, $self->file ); ### fetch the file ### my $target; unless( $target = $ftp->get( $remote, $to ) ) { return $self->_error(loc("Could not fetch '%1' from '%2'", $remote, $self->host)); } ### log out ### $ftp->quit; return $target; } else { $METHOD_FAIL->{'netftp'} = 1; return; } } ### /bin/wget fetch ### sub _wget_fetch { my $self = shift; my %hash = @_; my ($to); my $tmpl = { to => { required => 1, store => \$to } }; check( $tmpl, \%hash ) or return; ### see if we have a wget binary ### if( my $wget = can_run('wget') ) { ### no verboseness, thanks ### my $cmd = [ $wget, '--quiet' ]; ### if a timeout is set, add it ### push(@$cmd, '--timeout=' . $TIMEOUT) if $TIMEOUT; ### run passive if specified ### push @$cmd, '--passive-ftp' if $FTP_PASSIVE; ### set the output document, add the uri ### push @$cmd, '--output-document', $to, $self->uri; ### with IPC::Cmd > 0.41, this is fixed in teh library, ### and there's no need for special casing any more. ### DO NOT quote things for IPC::Run, it breaks stuff. # $IPC::Cmd::USE_IPC_RUN # ? ($to, $self->uri) # : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE); ### shell out ### my $captured; unless(run( command => $cmd, buffer => \$captured, verbose => $DEBUG )) { ### wget creates the output document always, even if the fetch ### fails.. so unlink it in that case 1 while unlink $to; return $self->_error(loc( "Command failed: %1", $captured || '' )); } return $to; } else { $METHOD_FAIL->{'wget'} = 1; return; } } ### /bin/lftp fetch ### sub _lftp_fetch { my $self = shift; my %hash = @_; my ($to); my $tmpl = { to => { required => 1, store => \$to } }; check( $tmpl, \%hash ) or return; ### see if we have a wget binary ### if( my $lftp = can_run('lftp') ) { ### no verboseness, thanks ### my $cmd = [ $lftp, '-f' ]; my $fh = File::Temp->new; my $str; ### if a timeout is set, add it ### $str .= "set net:timeout $TIMEOUT;\n" if $TIMEOUT; ### run passive if specified ### $str .= "set ftp:passive-mode 1;\n" if $FTP_PASSIVE; ### set the output document, add the uri ### ### quote the URI, because lftp supports certain shell ### expansions, most notably & for backgrounding. ### ' quote does nto work, must be " $str .= q[get ']. $self->uri .q[' -o ]. $to . $/; if( $DEBUG ) { my $pp_str = join ' ', split $/, $str; print "# lftp command: $pp_str\n"; } ### write straight to the file. $fh->autoflush(1); print $fh $str; ### the command needs to be 1 string to be executed push @$cmd, $fh->filename; ### with IPC::Cmd > 0.41, this is fixed in teh library, ### and there's no need for special casing any more. ### DO NOT quote things for IPC::Run, it breaks stuff. # $IPC::Cmd::USE_IPC_RUN # ? ($to, $self->uri) # : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE); ### shell out ### my $captured; unless(run( command => $cmd, buffer => \$captured, verbose => $DEBUG )) { ### wget creates the output document always, even if the fetch ### fails.. so unlink it in that case 1 while unlink $to; return $self->_error(loc( "Command failed: %1", $captured || '' )); } return $to; } else { $METHOD_FAIL->{'lftp'} = 1; return; } } ### /bin/ftp fetch ### sub _ftp_fetch { my $self = shift; my %hash = @_; my ($to); my $tmpl = { to => { required => 1, store => \$to } }; check( $tmpl, \%hash ) or return; ### see if we have a ftp binary ### if( my $ftp = can_run('ftp') ) { my $fh = FileHandle->new; local $SIG{CHLD} = 'IGNORE'; unless ($fh->open("|$ftp -n")) { return $self->_error(loc("%1 creation failed: %2", $ftp, $!)); } my @dialog = ( "lcd " . dirname($to), "open " . $self->host, "user anonymous $FROM_EMAIL", "cd /", "cd " . $self->path, "binary", "get " . $self->file . " " . $self->output_file, "quit", ); foreach (@dialog) { $fh->print($_, "\n") } $fh->close or return; return $to; } } ### lynx is stupid - it decompresses any .gz file it finds to be text ### use /bin/lynx to fetch files sub _lynx_fetch { my $self = shift; my %hash = @_; my ($to); my $tmpl = { to => { required => 1, store => \$to } }; check( $tmpl, \%hash ) or return; ### see if we have a lynx binary ### if( my $lynx = can_run('lynx') ) { unless( IPC::Cmd->can_capture_buffer ) { $METHOD_FAIL->{'lynx'} = 1; return $self->_error(loc( "Can not capture buffers. Can not use '%1' to fetch files", 'lynx' )); } ### check if the HTTP resource exists ### if ($self->uri =~ /^https?:\/\//i) { my $cmd = [ $lynx, '-head', '-source', "-auth=anonymous:$FROM_EMAIL", ]; push @$cmd, "-connect_timeout=$TIMEOUT" if $TIMEOUT; push @$cmd, $self->uri; ### shell out ### my $head; unless(run( command => $cmd, buffer => \$head, verbose => $DEBUG ) ) { return $self->_error(loc("Command failed: %1", $head || '')); } unless($head =~ /^HTTP\/\d+\.\d+ 200\b/) { return $self->_error(loc("Command failed: %1", $head || '')); } } ### write to the output file ourselves, since lynx ass_u_mes to much my $local = FileHandle->new(">$to") or return $self->_error(loc( "Could not open '%1' for writing: %2",$to,$!)); ### dump to stdout ### my $cmd = [ $lynx, '-source', "-auth=anonymous:$FROM_EMAIL", ]; push @$cmd, "-connect_timeout=$TIMEOUT" if $TIMEOUT; ### DO NOT quote things for IPC::Run, it breaks stuff. push @$cmd, $self->uri; ### with IPC::Cmd > 0.41, this is fixed in teh library, ### and there's no need for special casing any more. ### DO NOT quote things for IPC::Run, it breaks stuff. # $IPC::Cmd::USE_IPC_RUN # ? $self->uri # : QUOTE. $self->uri .QUOTE; ### shell out ### my $captured; unless(run( command => $cmd, buffer => \$captured, verbose => $DEBUG ) ) { return $self->_error(loc("Command failed: %1", $captured || '')); } ### print to local file ### ### XXX on a 404 with a special error page, $captured will actually ### hold the contents of that page, and make it *appear* like the ### request was a success, when really it wasn't :( ### there doesn't seem to be an option for lynx to change the exit ### code based on a 4XX status or so. ### the closest we can come is using --error_file and parsing that, ### which is very unreliable ;( $local->print( $captured ); $local->close or return; return $to; } else { $METHOD_FAIL->{'lynx'} = 1; return; } } ### use /bin/ncftp to fetch files sub _ncftp_fetch { my $self = shift; my %hash = @_; my ($to); my $tmpl = { to => { required => 1, store => \$to } }; check( $tmpl, \%hash ) or return; ### we can only set passive mode in interactive sessions, so bail out ### if $FTP_PASSIVE is set return if $FTP_PASSIVE; ### see if we have a ncftp binary ### if( my $ncftp = can_run('ncftp') ) { my $cmd = [ $ncftp, '-V', # do not be verbose '-p', $FROM_EMAIL, # email as password $self->host, # hostname dirname($to), # local dir for the file # remote path to the file ### DO NOT quote things for IPC::Run, it breaks stuff. $IPC::Cmd::USE_IPC_RUN ? File::Spec::Unix->catdir( $self->path, $self->file ) : QUOTE. File::Spec::Unix->catdir( $self->path, $self->file ) .QUOTE ]; ### shell out ### my $captured; unless(run( command => $cmd, buffer => \$captured, verbose => $DEBUG ) ) { return $self->_error(loc("Command failed: %1", $captured || '')); } return $to; } else { $METHOD_FAIL->{'ncftp'} = 1; return; } } ### use /bin/curl to fetch files sub _curl_fetch { my $self = shift; my %hash = @_; my ($to); my $tmpl = { to => { required => 1, store => \$to } }; check( $tmpl, \%hash ) or return; if (my $curl = can_run('curl')) { ### these long opts are self explanatory - I like that -jmb my $cmd = [ $curl, '-q' ]; push(@$cmd, '--connect-timeout', $TIMEOUT) if $TIMEOUT; push(@$cmd, '--silent') unless $DEBUG; ### curl does the right thing with passive, regardless ### if ($self->scheme eq 'ftp') { push(@$cmd, '--user', "anonymous:$FROM_EMAIL"); } ### curl doesn't follow 302 (temporarily moved) etc automatically ### so we add --location to enable that. push @$cmd, '--fail', '--location', '--output', $to, $self->uri; ### with IPC::Cmd > 0.41, this is fixed in teh library, ### and there's no need for special casing any more. ### DO NOT quote things for IPC::Run, it breaks stuff. # $IPC::Cmd::USE_IPC_RUN # ? ($to, $self->uri) # : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE); my $captured; unless(run( command => $cmd, buffer => \$captured, verbose => $DEBUG ) ) { return $self->_error(loc("Command failed: %1", $captured || '')); } return $to; } else { $METHOD_FAIL->{'curl'} = 1; return; } } ### /usr/bin/fetch fetch! ### sub _fetch_fetch { my $self = shift; my %hash = @_; my ($to); my $tmpl = { to => { required => 1, store => \$to } }; check( $tmpl, \%hash ) or return; ### see if we have a wget binary ### if( HAS_FETCH and my $fetch = can_run('fetch') ) { ### no verboseness, thanks ### my $cmd = [ $fetch, '-q' ]; ### if a timeout is set, add it ### push(@$cmd, '-T', $TIMEOUT) if $TIMEOUT; ### run passive if specified ### #push @$cmd, '-p' if $FTP_PASSIVE; local $ENV{'FTP_PASSIVE_MODE'} = 1 if $FTP_PASSIVE; ### set the output document, add the uri ### push @$cmd, '-o', $to, $self->uri; ### with IPC::Cmd > 0.41, this is fixed in teh library, ### and there's no need for special casing any more. ### DO NOT quote things for IPC::Run, it breaks stuff. # $IPC::Cmd::USE_IPC_RUN # ? ($to, $self->uri) # : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE); ### shell out ### my $captured; unless(run( command => $cmd, buffer => \$captured, verbose => $DEBUG )) { ### wget creates the output document always, even if the fetch ### fails.. so unlink it in that case 1 while unlink $to; return $self->_error(loc( "Command failed: %1", $captured || '' )); } return $to; } else { $METHOD_FAIL->{'wget'} = 1; return; } } ### use File::Copy for fetching file:// urls ### ### ### See section 3.10 of RFC 1738 (http://www.faqs.org/rfcs/rfc1738.html) ### Also see wikipedia on file:// (http://en.wikipedia.org/wiki/File://) ### sub _file_fetch { my $self = shift; my %hash = @_; my ($to); my $tmpl = { to => { required => 1, store => \$to } }; check( $tmpl, \%hash ) or return; ### prefix a / on unix systems with a file uri, since it would ### look somewhat like this: ### file:///home/kane/file ### whereas windows file uris for 'c:\some\dir\file' might look like: ### file:///C:/some/dir/file ### file:///C|/some/dir/file ### or for a network share '\\host\share\some\dir\file': ### file:////host/share/some/dir/file ### ### VMS file uri's for 'DISK$USER:[MY.NOTES]NOTE123456.TXT' might look like: ### file://vms.host.edu/disk$user/my/notes/note12345.txt ### my $path = $self->path; my $vol = $self->vol; my $share = $self->share; my $remote; if (!$share and $self->host) { return $self->_error(loc( "Currently %1 cannot handle hosts in %2 urls", 'File::Fetch', 'file://' )); } if( $vol ) { $path = File::Spec->catdir( split /\//, $path ); $remote = File::Spec->catpath( $vol, $path, $self->file); } elsif( $share ) { ### win32 specific, and a share name, so we wont bother with File::Spec $path =~ s|/+|\\|g; $remote = "\\\\".$self->host."\\$share\\$path"; } else { ### File::Spec on VMS can not currently handle UNIX syntax. my $file_class = ON_VMS ? 'File::Spec::Unix' : 'File::Spec'; $remote = $file_class->catfile( $path, $self->file ); } ### File::Copy is littered with 'die' statements :( ### my $rv = eval { File::Copy::copy( $remote, $to ) }; ### something went wrong ### if( !$rv or $@ ) { return $self->_error(loc("Could not copy '%1' to '%2': %3 %4", $remote, $to, $!, $@)); } return $to; } ### use /usr/bin/rsync to fetch files sub _rsync_fetch { my $self = shift; my %hash = @_; my ($to); my $tmpl = { to => { required => 1, store => \$to } }; check( $tmpl, \%hash ) or return; if (my $rsync = can_run('rsync')) { my $cmd = [ $rsync ]; ### XXX: rsync has no I/O timeouts at all, by default push(@$cmd, '--timeout=' . $TIMEOUT) if $TIMEOUT; push(@$cmd, '--quiet') unless $DEBUG; ### DO NOT quote things for IPC::Run, it breaks stuff. push @$cmd, $self->uri, $to; ### with IPC::Cmd > 0.41, this is fixed in teh library, ### and there's no need for special casing any more. ### DO NOT quote things for IPC::Run, it breaks stuff. # $IPC::Cmd::USE_IPC_RUN # ? ($to, $self->uri) # : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE); my $captured; unless(run( command => $cmd, buffer => \$captured, verbose => $DEBUG ) ) { return $self->_error(loc("Command %1 failed: %2", "@$cmd" || '', $captured || '')); } return $to; } else { $METHOD_FAIL->{'rsync'} = 1; return; } } ################################# # # Error code # ################################# =pod =head2 $ff->error([BOOL]) Returns the last encountered error as string. Pass it a true value to get the C output instead. =cut ### error handling the way Archive::Extract does it sub _error { my $self = shift; my $error = shift; $self->_error_msg( $error ); $self->_error_msg_long( Carp::longmess($error) ); if( $WARN ) { carp $DEBUG ? $self->_error_msg_long : $self->_error_msg; } return; } sub error { my $self = shift; return shift() ? $self->_error_msg_long : $self->_error_msg; } 1; =pod =head1 HOW IT WORKS File::Fetch is able to fetch a variety of uris, by using several external programs and modules. Below is a mapping of what utilities will be used in what order for what schemes, if available: file => LWP, lftp, file http => LWP, HTTP::Lite, wget, curl, lftp, fetch, lynx, iosock ftp => LWP, Net::FTP, wget, curl, lftp, fetch, ncftp, ftp rsync => rsync If you'd like to disable the use of one or more of these utilities and/or modules, see the C<$BLACKLIST> variable further down. If a utility or module isn't available, it will be marked in a cache (see the C<$METHOD_FAIL> variable further down), so it will not be tried again. The C method will only fail when all options are exhausted, and it was not able to retrieve the file. The C utility is available on FreeBSD. NetBSD and Dragonfly BSD may also have it from C. We only check for C on those three platforms. C is a very limited L based mechanism for retrieving C schemed urls. It doesn't follow redirects for instance. A special note about fetching files from an ftp uri: By default, all ftp connections are done in passive mode. To change that, see the C<$FTP_PASSIVE> variable further down. Furthermore, ftp uris only support anonymous connections, so no named user/password pair can be passed along. C is blacklisted by default; see the C<$BLACKLIST> variable further down. =head1 GLOBAL VARIABLES The behaviour of File::Fetch can be altered by changing the following global variables: =head2 $File::Fetch::FROM_EMAIL This is the email address that will be sent as your anonymous ftp password. Default is C. =head2 $File::Fetch::USER_AGENT This is the useragent as C will report it. Default is C. =head2 $File::Fetch::FTP_PASSIVE This variable controls whether the environment variable C and any passive switches to commandline tools will be set to true. Default value is 1. Note: When $FTP_PASSIVE is true, C will not be used to fetch files, since passive mode can only be set interactively for this binary =head2 $File::Fetch::TIMEOUT When set, controls the network timeout (counted in seconds). Default value is 0. =head2 $File::Fetch::WARN This variable controls whether errors encountered internally by C should be C'd or not. Set to false to silence warnings. Inspect the output of the C method manually to see what went wrong. Defaults to C. =head2 $File::Fetch::DEBUG This enables debugging output when calling commandline utilities to fetch files. This also enables C errors, instead of the regular C errors. Good for tracking down why things don't work with your particular setup. Default is 0. =head2 $File::Fetch::BLACKLIST This is an array ref holding blacklisted modules/utilities for fetching files with. To disallow the use of, for example, C and C, you could set $File::Fetch::BLACKLIST to: $File::Fetch::BLACKLIST = [qw|lwp netftp|] The default blacklist is [qw|ftp|], as C is rather unreliable. See the note on C below. =head2 $File::Fetch::METHOD_FAIL This is a hashref registering what modules/utilities were known to fail for fetching files (mostly because they weren't installed). You can reset this cache by assigning an empty hashref to it, or individually remove keys. See the note on C below. =head1 MAPPING Here's a quick mapping for the utilities/modules, and their names for the $BLACKLIST, $METHOD_FAIL and other internal functions. LWP => lwp HTTP::Lite => httplite HTTP::Tiny => httptiny Net::FTP => netftp wget => wget lynx => lynx ncftp => ncftp ftp => ftp curl => curl rsync => rsync lftp => lftp fetch => fetch IO::Socket => iosock =head1 FREQUENTLY ASKED QUESTIONS =head2 So how do I use a proxy with File::Fetch? C currently only supports proxies with LWP::UserAgent. You will need to set your environment variables accordingly. For example, to use an ftp proxy: $ENV{ftp_proxy} = 'foo.com'; Refer to the LWP::UserAgent manpage for more details. =head2 I used 'lynx' to fetch a file, but its contents is all wrong! C can only fetch remote files by dumping its contents to C, which we in turn capture. If that content is a 'custom' error file (like, say, a C<404 handler>), you will get that contents instead. Sadly, C doesn't support any options to return a different exit code on non-C<200 OK> status, giving us no way to tell the difference between a 'successful' fetch and a custom error page. Therefor, we recommend to only use C as a last resort. This is why it is at the back of our list of methods to try as well. =head2 Files I'm trying to fetch have reserved characters or non-ASCII characters in them. What do I do? C is relatively smart about things. When trying to write a file to disk, it removes the C (see the C method for details) from the file name before creating it. In most cases this suffices. If you have any other characters you need to escape, please install the C module from CPAN, and pre-encode your URI before passing it to C. You can read about the details of URIs and URI encoding here: http://www.faqs.org/rfcs/rfc2396.html =head1 TODO =over 4 =item Implement $PREFER_BIN To indicate to rather use commandline tools than modules =back =head1 BUG REPORTS Please report bugs or other issues to Ebug-file-fetch@rt.cpan.org. =head1 AUTHOR This module by Jos Boumans Ekane@cpan.orgE. =head1 COPYRIGHT This library is free software; you may redistribute and/or modify it under the same terms as Perl itself. =cut # Local variables: # c-indentation-style: bsd # c-basic-offset: 4 # indent-tabs-mode: nil # End: # vim: expandtab shiftwidth=4: kage. =head1 BUGS As of Perl 5.8.0 after using this module you cannot use the implicit C<$_> or the special filehandle C<_> with stat() or lstat(), trying to do so leads into strange errors. The workaround is for C<$_> to be explicit my $stat_obj = stat $_; and for C<_> to explicitly populate the object using the unexported and undocumented populate() function with CORE::stat(): my $stat_obj = File::stat::populate(CORE::stat(_)); =head1 ERRORS =over 4 =item -%s is not implemented on a File::stat object The filetest operators C<-t>, C<-T> and C<-B> are not implemented, as they require more information than just a stat buffer. =back =head1 WARNINGS These can all be disabled with no warnings "File::stat"; =over 4 =item File::stat ignores use filetest 'access' You have tried to use one of the C<-rwxRWX> filetests with C in effect. C will ignore the pragma, and just use the information in the C member as usual. =item File::stat ignores VMS ACLs VMS systems have a permissions structure that cannot be completely represented in a stat buffer, and unlike on other systems the builtin filetest operators respect this. The C overloads, however, do not, since the information required is not available. =back =head1 NOTE While this class is currently implemented using the Class::Struct module to build a struct-like class, you shouldn't rely upon this. =head1 AUTHOR Tom Christiansen  ./3 .. Simple.pm ./3 ..Long.pmStd.pmV[0] = "-$rest"; } else { shift(@ARGV); } } } unless (ref $hash) { local $Exporter::ExportLevel = 1; import Getopt::Std; } $errs == 0; } 1; package File::Find; use 5.006; use strict; use warnings; use warnings::register; our $VERSION = '1.19'; require Exporter; require Cwd; # # Modified to ensure sub-directory traversal order is not inverted by stack # push and pops. That is remains in the same order as in the directory file, # or user pre-processing (EG:sorted). # =head1 NAME File::Find - Traverse a directory tree. =head1 SYNOPSIS use File::Find; find(\&wanted, @directories_to_search); sub wanted { ... } use File::Find; finddepth(\&wanted, @directories_to_search); sub wanted { ... } use File::Find; find({ wanted => \&process, follow => 1 }, '.'); =head1 DESCRIPTION These are functions for searching through directory trees doing work on each file found similar to the Unix I command. File::Find exports two functions, C and C. They work similarly but have subtle differences. =over 4 =item B find(\&wanted, @directories); find(\%options, @directories); C does a depth-first search over the given C<@directories> in the order they are given. For each file or directory found, it calls the C<&wanted> subroutine. (See below for details on how to use the C<&wanted> function). Additionally, for each directory found, it will C into that directory and continue the search, invoking the C<&wanted> function on each file or subdirectory in the directory. =item B finddepth(\&wanted, @directories); finddepth(\%options, @directories); C works just like C except that it invokes the C<&wanted> function for a directory I invoking it for the directory's contents. It does a postorder traversal instead of a preorder traversal, working from the bottom of the directory tree up where C works from the top of the tree down. =back =head2 %options The first argument to C is either a code reference to your C<&wanted> function, or a hash reference describing the operations to be performed for each file. The code reference is described in L below. Here are the possible keys for the hash: =over 3 =item C The value should be a code reference. This code reference is described in L below. The C<&wanted> subroutine is mandatory. =item C Reports the name of a directory only AFTER all its entries have been reported. Entry point C is a shortcut for specifying C<< { bydepth => 1 } >> in the first argument of C. =item C The value should be a code reference. This code reference is used to preprocess the current directory. The name of the currently processed directory is in C<$File::Find::dir>. Your preprocessing function is called after C, but before the loop that calls the C function. It is called with a list of strings (actually file/directory names) and is expected to return a list of strings. The code can be used to sort the file/directory names alphabetically, numerically, or to filter out directory entries based on their name alone. When I or I are in effect, C is a no-op. =item C The value should be a code reference. It is invoked just before leaving the currently processed directory. It is called in void context with no arguments. The name of the current directory is in C<$File::Find::dir>. This hook is handy for summarizing a directory, such as calculating its disk usage. When I or I are in effect, C is a no-op. =item C Causes symbolic links to be followed. Since directory trees with symbolic links (followed) may contain files more than once and may even have cycles, a hash has to be built up with an entry for each file. This might be expensive both in space and time for a large directory tree. See I and I below. If either I or I is in effect: =over 6 =item * It is guaranteed that an I has been called before the user's C function is called. This enables fast file checks involving S<_>. Note that this guarantee no longer holds if I or I are not set. =item * There is a variable C<$File::Find::fullname> which holds the absolute pathname of the file with all symbolic links resolved. If the link is a dangling symbolic link, then fullname will be set to C. =back This is a no-op on Win32. =item C This is similar to I except that it may report some files more than once. It does detect cycles, however. Since only symbolic links have to be hashed, this is much cheaper both in space and time. If processing a file more than once (by the user's C function) is worse than just taking time, the option I should be used. This is also a no-op on Win32. =item C C, which is the default, causes all files which are neither directories nor symbolic links to be ignored if they are about to be processed a second time. If a directory or a symbolic link are about to be processed a second time, File::Find dies. C causes File::Find to die if any file is about to be processed a second time. C causes File::Find to ignore any duplicate files and directories but to proceed normally otherwise. =item C If true and a code reference, will be called with the symbolic link name and the directory it lives in as arguments. Otherwise, if true and warnings are on, warning "symbolic_link_name is a dangling symbolic link\n" will be issued. If false, the dangling symbolic link will be silently ignored. =item C Does not C to each directory as it recurses. The C function will need to be aware of this, of course. In this case, C<$_> will be the same as C<$File::Find::name>. =item C If find is used in taint-mode (-T command line switch or if EUID != UID or if EGID != GID) then internally directory names have to be untainted before they can be chdir'ed to. Therefore they are checked against a regular expression I. Note that all names passed to the user's I function are still tainted. If this option is used while not in taint-mode, C is a no-op. =item C See above. This should be set using the C quoting operator. The default is set to C. Note that the parentheses are vital. =item C If set, a directory which fails the I is skipped, including all its sub-directories. The default is to 'die' in such a case. =back =head2 The wanted function The C function does whatever verifications you want on each file and directory. Note that despite its name, the C function is a generic callback function, and does B tell File::Find if a file is "wanted" or not. In fact, its return value is ignored. The wanted function takes no arguments but rather does its work through a collection of variables. =over 4 =item C<$File::Find::dir> is the current directory name, =item C<$_> is the current filename within that directory =item C<$File::Find::name> is the complete pathname to the file. =back The above variables have all been localized and may be changed without affecting data outside of the wanted function. For example, when examining the file F you will have: $File::Find::dir = /some/path/ $_ = foo.ext $File::Find::name = /some/path/foo.ext You are chdir()'d to C<$File::Find::dir> when the function is called, unless C was specified. Note that when changing to directories is in effect the root directory (F) is a somewhat special case inasmuch as the concatenation of C<$File::Find::dir>, C<'/'> and C<$_> is not literally equal to C<$File::Find::name>. The table below summarizes all variants: $File::Find::name $File::Find::dir $_ default / / . no_chdir=>0 /etc / etc /etc/x /etc x no_chdir=>1 / / / /etc / /etc /etc/x /etc /etc/x When C or C are in effect, there is also a C<$File::Find::fullname>. The function may set C<$File::Find::prune> to prune the tree unless C was specified. Unless C or C is specified, for compatibility reasons (find.pl, find2perl) there are in addition the following globals available: C<$File::Find::topdir>, C<$File::Find::topdev>, C<$File::Find::topino>, C<$File::Find::topmode> and C<$File::Find::topnlink>. This library is useful for the C tool, which when fed, find2perl / -name .nfs\* -mtime +7 \ -exec rm -f {} \; -o -fstype nfs -prune produces something like: sub wanted { /^\.nfs.*\z/s && (($dev, $ino, $mode, $nlink, $uid, $gid) = lstat($_)) && int(-M _) > 7 && unlink($_) || ($nlink || (($dev, $ino, $mode, $nlink, $uid, $gid) = lstat($_))) && $dev < 0 && ($File::Find::prune = 1); } Notice the C<_> in the above C: the C<_> is a magical filehandle that caches the information from the preceding C, C, or filetest. Here's another interesting wanted function. It will find all symbolic links that don't resolve: sub wanted { -l && !-e && print "bogus link: $File::Find::name\n"; } See also the script C on CPAN for a nice application of this module. =head1 WARNINGS If you run your program with the C<-w> switch, or if you use the C pragma, File::Find will report warnings for several weird situations. You can disable these warnings by putting the statement no warnings 'File::Find'; in the appropriate scope. See L for more info about lexical warnings. =head1 CAVEAT =over 2 =item $dont_use_nlink You can set the variable C<$File::Find::dont_use_nlink> to 1, if you want to force File::Find to always stat directories. This was used for file systems that do not have an C count matching the number of sub-directories. Examples are ISO-9660 (CD-ROM), AFS, HPFS (OS/2 file system), FAT (DOS file system) and a couple of others. You shouldn't need to set this variable, since File::Find should now detect such file systems on-the-fly and switch itself to using stat. This works even for parts of your file system, like a mounted CD-ROM. If you do set C<$File::Find::dont_use_nlink> to 1, you will notice slow-downs. =item symlinks Be aware that the option to follow symbolic links can be dangerous. Depending on the structure of the directory tree (including symbolic links to directories) you might traverse a given (physical) directory more than once (only if C is in effect). Furthermore, deleting or changing files in a symbolically linked directory might cause very unpleasant surprises, since you delete or change files in an unknown directory. =back =head1 BUGS AND CAVEATS Despite the name of the C function, both C and C perform a depth-first search of the directory hierarchy. =head1 HISTORY File::Find used to produce incorrect results if called recursively. During the development of perl 5.8 this bug was fixed. The first fixed version of File::Find was 1.01. =head1 SEE ALSO find, find2perl. =cut our @ISA = qw(Exporter); our @EXPORT = qw(find finddepth); use strict; my $Is_VMS; my $Is_Win32; require File::Basename; require File::Spec; # Should ideally be my() not our() but local() currently # refuses to operate on lexicals our %SLnkSeen; our ($wanted_callback, $avoid_nlink, $bydepth, $no_chdir, $follow, $follow_skip, $full_check, $untaint, $untaint_skip, $untaint_pat, $pre_process, $post_process, $dangling_symlinks); sub contract_name { my ($cdir,$fn) = @_; return substr($cdir,0,rindex($cdir,'/')) if $fn eq $File::Find::current_dir; $cdir = substr($cdir,0,rindex($cdir,'/')+1); $fn =~ s|^\./||; my $abs_name= $cdir . $fn; if (substr($fn,0,3) eq '../') { 1 while $abs_name =~ s!/[^/]*/\.\./+!/!; } return $abs_name; } sub PathCombine($$) { my ($Base,$Name) = @_; my $AbsName; if (substr($Name,0,1) eq '/') { $AbsName= $Name; } else { $AbsName= contract_name($Base,$Name); } # (simple) check for recursion my $newlen= length($AbsName); if ($newlen <= length($Base)) { if (($newlen == length($Base) || substr($Base,$newlen,1) eq '/') && $AbsName eq substr($Base,0,$newlen)) { return undef; } } return $AbsName; } sub Follow_SymLink($) { my ($AbsName) = @_; my ($NewName,$DEV, $INO); ($DEV, $INO)= lstat $AbsName; while (-l _) { if ($SLnkSeen{$DEV, $INO}++) { if ($follow_skip < 2) { die "$AbsName is encountered a second time"; } else { return undef; } } $NewName= PathCombine($AbsName, readlink($AbsName)); unless(defined $NewName) { if ($follow_skip < 2) { die "$AbsName is a recursive symbolic link"; } else { return undef; } } else { $AbsName= $NewName; } ($DEV, $INO) = lstat($AbsName); return undef unless defined $DEV; # dangling symbolic link } if ($full_check && defined $DEV && $SLnkSeen{$DEV, $INO}++) { if ( ($follow_skip < 1) || ((-d _) && ($follow_skip < 2)) ) { die "$AbsName encountered a second time"; } else { return undef; } } return $AbsName; } our($dir, $name, $fullname, $prune); sub _find_dir_symlnk($$$); sub _find_dir($$$); # check whether or not a scalar variable is tainted # (code straight from the Camel, 3rd ed., page 561) sub is_tainted_pp { my $arg = shift; my $nada = substr($arg, 0, 0); # zero-length local $@; eval { eval "# $nada" }; return length($@) != 0; } sub _find_opt { my $wanted = shift; die "invalid top directory" unless defined $_[0]; # This function must local()ize everything because callbacks may # call find() or finddepth() local %SLnkSeen; local ($wanted_callback, $avoid_nlink, $bydepth, $no_chdir, $follow, $follow_skip, $full_check, $untaint, $untaint_skip, $untaint_pat, $pre_process, $post_process, $dangling_symlinks); local($dir, $name, $fullname, $prune); local *_ = \my $a; my $cwd = $wanted->{bydepth} ? Cwd::fastcwd() : Cwd::getcwd(); if ($Is_VMS) { # VMS returns this by default in VMS format which just doesn't # work for the rest of this module. $cwd = VMS::Filespec::unixpath($cwd); # Apparently this is not expected to have a trailing space. # To attempt to make VMS/UNIX conversions mostly reversable, # a trailing slash is needed. The run-time functions ignore the # resulting double slash, but it causes the perl tests to fail. $cwd =~ s#/\z##; # This comes up in upper case now, but should be lower. # In the future this could be exact case, no need to change. } my $cwd_untainted = $cwd; my $check_t_cwd = 1; $wanted_callback = $wanted->{wanted}; $bydepth = $wanted->{bydepth}; $pre_process = $wanted->{preprocess}; $post_process = $wanted->{postprocess}; $no_chdir = $wanted->{no_chdir}; $full_check = $Is_Win32 ? 0 : $wanted->{follow}; $follow = $Is_Win32 ? 0 : $full_check || $wanted->{follow_fast}; $follow_skip = $wanted->{follow_skip}; $untaint = $wanted->{untaint}; $untaint_pat = $wanted->{untaint_pattern}; $untaint_skip = $wanted->{untaint_skip}; $dangling_symlinks = $wanted->{dangling_symlinks}; # for compatibility reasons (find.pl, find2perl) local our ($topdir, $topdev, $topino, $topmode, $topnlink); # a symbolic link to a directory doesn't increase the link count $avoid_nlink = $follow || $File::Find::dont_use_nlink; my ($abs_dir, $Is_Dir); Proc_Top_Item: foreach my $TOP (@_) { my $top_item = $TOP; ($topdev,$topino,$topmode,$topnlink) = $follow ? stat $top_item : lstat $top_item; if ($Is_Win32) { $top_item =~ s|[/\\]\z|| unless $top_item =~ m{^(?:\w:)?[/\\]$}; } else { $top_item =~ s|/\z|| unless $top_item eq '/'; } $Is_Dir= 0; if ($follow) { if (substr($top_item,0,1) eq '/') { $abs_dir = $top_item; } elsif ($top_item eq $File::Find::current_dir) { $abs_dir = $cwd; } else { # care about any ../ $top_item =~ s/\.dir\z//i if $Is_VMS; $abs_dir = contract_name("$cwd/",$top_item); } $abs_dir= Follow_SymLink($abs_dir); unless (defined $abs_dir) { if ($dangling_symlinks) { if (ref $dangling_symlinks eq 'CODE') { $dangling_symlinks->($top_item, $cwd); } else { warnings::warnif "$top_item is a dangling symbolic link\n"; } } next Proc_Top_Item; } if (-d _) { $top_item =~ s/\.dir\z//i if $Is_VMS; _find_dir_symlnk($wanted, $abs_dir, $top_item); $Is_Dir= 1; } } else { # no follow $topdir = $top_item; unless (defined $topnlink) { warnings::warnif "Can't stat $top_item: $!\n"; next Proc_Top_Item; } if (-d _) { $top_item =~ s/\.dir\z//i if $Is_VMS; _find_dir($wanted, $top_item, $topnlink); $Is_Dir= 1; } else { $abs_dir= $top_item; } } unless ($Is_Dir) { unless (($_,$dir) = File::Basename::fileparse($abs_dir)) { ($dir,$_) = ('./', $top_item); } $abs_dir = $dir; if (( $untaint ) && (is_tainted($dir) )) { ( $abs_dir ) = $dir =~ m|$untaint_pat|; unless (defined $abs_dir) { if ($untaint_skip == 0) { die "directory $dir is still tainted"; } else { next Proc_Top_Item; } } } unless ($no_chdir || chdir $abs_dir) { warnings::warnif "Couldn't chdir $abs_dir: $!\n"; next Proc_Top_Item; } $name = $abs_dir . $_; # $File::Find::name $_ = $name if $no_chdir; { $wanted_callback->() }; # protect against wild "next" } unless ( $no_chdir ) { if ( ($check_t_cwd) && (($untaint) && (is_tainted($cwd) )) ) { ( $cwd_untainted ) = $cwd =~ m|$untaint_pat|; unless (defined $cwd_untainted) { die "insecure cwd in find(depth)"; } $check_t_cwd = 0; } unless (chdir $cwd_untainted) { die "Can't cd to $cwd: $!\n"; } } } } # API: # $wanted # $p_dir : "parent directory" # $nlink : what came back from the stat # preconditions: # chdir (if not no_chdir) to dir sub _find_dir($$$) { my ($wanted, $p_dir, $nlink) = @_; my ($CdLvl,$Level) = (0,0); my @Stack; my @filenames; my ($subcount,$sub_nlink); my $SE= []; my $dir_name= $p_dir; my $dir_pref; my $dir_rel = $File::Find::current_dir; my $tainted = 0; my $no_nlink; if ($Is_Win32) { $dir_pref = ($p_dir =~ m{^(?:\w:[/\\]?|[/\\])$} ? $p_dir : "$p_dir/" ); } elsif ($Is_VMS) { # VMS is returning trailing .dir on directories # and trailing . on files and symbolic links # in UNIX syntax. # $p_dir =~ s/\.(dir)?$//i unless $p_dir eq '.'; $dir_pref = ($p_dir =~ m/[\]>]+$/ ? $p_dir : "$p_dir/" ); } else { $dir_pref= ( $p_dir eq '/' ? '/' : "$p_dir/" ); } local ($dir, $name, $prune, *DIR); unless ( $no_chdir || ($p_dir eq $File::Find::current_dir)) { my $udir = $p_dir; if (( $untaint ) && (is_tainted($p_dir) )) { ( $udir ) = $p_dir =~ m|$untaint_pat|; unless (defined $udir) { if ($untaint_skip == 0) { die "directory $p_dir is still tainted"; } else { return; } } } unless (chdir ($Is_VMS && $udir !~ /[\/\[<]+/ ? "./$udir" : $udir)) { warnings::warnif "Can't cd to $udir: $!\n"; return; } } # push the starting directory push @Stack,[$CdLvl,$p_dir,$dir_rel,-1] if $bydepth; while (defined $SE) { unless ($bydepth) { $dir= $p_dir; # $File::Find::dir $name= $dir_name; # $File::Find::name $_= ($no_chdir ? $dir_name : $dir_rel ); # $_ # prune may happen here $prune= 0; { $wanted_callback->() }; # protect against wild "next" next if $prune; } # change to that directory unless ($no_chdir || ($dir_rel eq $File::Find::current_dir)) { my $udir= $dir_rel; if ( ($untaint) && (($tainted) || ($tainted = is_tainted($dir_rel) )) ) { ( $udir ) = $dir_rel =~ m|$untaint_pat|; unless (defined $udir) { if ($untaint_skip == 0) { die "directory (" . ($p_dir ne '/' ? $p_dir : '') . "/) $dir_rel is still tainted"; } else { # $untaint_skip == 1 next; } } } unless (chdir ($Is_VMS && $udir !~ /[\/\[<]+/ ? "./$udir" : $udir)) { warnings::warnif "Can't cd to (" . ($p_dir ne '/' ? $p_dir : '') . "/) $udir: $!\n"; next; } $CdLvl++; } $dir= $dir_name; # $File::Find::dir # Get the list of files in the current directory. unless (opendir DIR, ($no_chdir ? $dir_name : $File::Find::current_dir)) { warnings::warnif "Can't opendir($dir_name): $!\n"; next; } @filenames = readdir DIR; closedir(DIR); @filenames = $pre_process->(@filenames) if $pre_process; push @Stack,[$CdLvl,$dir_name,"",-2] if $post_process; # default: use whatever was specified # (if $nlink >= 2, and $avoid_nlink == 0, this will switch back) $no_nlink = $avoid_nlink; # if dir has wrong nlink count, force switch to slower stat method $no_nlink = 1 if ($nlink < 2); if ($nlink == 2 && !$no_nlink) { # This dir has no subdirectories. for my $FN (@filenames) { if ($Is_VMS) { # Big hammer here - Compensate for VMS trailing . and .dir # No win situation until this is changed, but this # will handle the majority of the cases with breaking the fewest $FN =~ s/\.dir\z//i; $FN =~ s#\.$## if ($FN ne '.'); } next if $FN =~ $File::Find::skip_pattern; $name = $dir_pref . $FN; # $File::Find::name $_ = ($no_chdir ? $name : $FN); # $_ { $wanted_callback->() }; # protect against wild "next" } } else { # This dir has subdirectories. $subcount = $nlink - 2; # HACK: insert directories at this position. so as to preserve # the user pre-processed ordering of files. # EG: directory traversal is in user sorted order, not at random. my $stack_top = @Stack; for my $FN (@filenames) { next if $FN =~ $File::Find::skip_pattern; if ($subcount > 0 || $no_nlink) { # Seen all the subdirs? # check for directoriness. # stat is faster for a file in the current directory $sub_nlink = (lstat ($no_chdir ? $dir_pref . $FN : $FN))[3]; if (-d _) { --$subcount; $FN =~ s/\.dir\z//i if $Is_VMS; # HACK: replace push to preserve dir traversal order #push @Stack,[$CdLvl,$dir_name,$FN,$sub_nlink]; splice @Stack, $stack_top, 0, [$CdLvl,$dir_name,$FN,$sub_nlink]; } else { $name = $dir_pref . $FN; # $File::Find::name $_= ($no_chdir ? $name : $FN); # $_ { $wanted_callback->() }; # protect against wild "next" } } else { $name = $dir_pref . $FN; # $File::Find::name $_= ($no_chdir ? $name : $FN); # $_ { $wanted_callback->() }; # protect against wild "next" } } } } continue { while ( defined ($SE = pop @Stack) ) { ($Level, $p_dir, $dir_rel, $nlink) = @$SE; if ($CdLvl > $Level && !$no_chdir) { my $tmp; if ($Is_VMS) { $tmp = '[' . ('-' x ($CdLvl-$Level)) . ']'; } else { $tmp = join('/',('..') x ($CdLvl-$Level)); } die "Can't cd to $tmp from $dir_name" unless chdir ($tmp); $CdLvl = $Level; } if ($Is_Win32) { $dir_name = ($p_dir =~ m{^(?:\w:[/\\]?|[/\\])$} ? "$p_dir$dir_rel" : "$p_dir/$dir_rel"); $dir_pref = "$dir_name/"; } elsif ($^O eq 'VMS') { if ($p_dir =~ m/[\]>]+$/) { $dir_name = $p_dir; $dir_name =~ s/([\]>]+)$/.$dir_rel$1/; $dir_pref = $dir_name; } else { $dir_name = "$p_dir/$dir_rel"; $dir_pref = "$dir_name/"; } } else { $dir_name = ($p_dir eq '/' ? "/$dir_rel" : "$p_dir/$dir_rel"); $dir_pref = "$dir_name/"; } if ( $nlink == -2 ) { $name = $dir = $p_dir; # $File::Find::name / dir $_ = $File::Find::current_dir; $post_process->(); # End-of-directory processing } elsif ( $nlink < 0 ) { # must be finddepth, report dirname now $name = $dir_name; if ( substr($name,-2) eq '/.' ) { substr($name, length($name) == 2 ? -1 : -2) = ''; } $dir = $p_dir; $_ = ($no_chdir ? $dir_name : $dir_rel ); if ( substr($_,-2) eq '/.' ) { substr($_, length($_) == 2 ? -1 : -2) = ''; } { $wanted_callback->() }; # protect against wild "next" } else { push @Stack,[$CdLvl,$p_dir,$dir_rel,-1] if $bydepth; last; } } } } # API: # $wanted # $dir_loc : absolute location of a dir # $p_dir : "parent directory" # preconditions: # chdir (if not no_chdir) to dir sub _find_dir_symlnk($$$) { my ($wanted, $dir_loc, $p_dir) = @_; # $dir_loc is the absolute directory my @Stack; my @filenames; my $new_loc; my $updir_loc = $dir_loc; # untainted parent directory my $SE = []; my $dir_name = $p_dir; my $dir_pref; my $loc_pref; my $dir_rel = $File::Find::current_dir; my $byd_flag; # flag for pending stack entry if $bydepth my $tainted = 0; my $ok = 1; $dir_pref = ( $p_dir eq '/' ? '/' : "$p_dir/" ); $loc_pref = ( $dir_loc eq '/' ? '/' : "$dir_loc/" ); local ($dir, $name, $fullname, $prune, *DIR); unless ($no_chdir) { # untaint the topdir if (( $untaint ) && (is_tainted($dir_loc) )) { ( $updir_loc ) = $dir_loc =~ m|$untaint_pat|; # parent dir, now untainted # once untainted, $updir_loc is pushed on the stack (as parent directory); # hence, we don't need to untaint the parent directory every time we chdir # to it later unless (defined $updir_loc) { if ($untaint_skip == 0) { die "directory $dir_loc is still tainted"; } else { return; } } } $ok = chdir($updir_loc) unless ($p_dir eq $File::Find::current_dir); unless ($ok) { warnings::warnif "Can't cd to $updir_loc: $!\n"; return; } } push @Stack,[$dir_loc,$updir_loc,$p_dir,$dir_rel,-1] if $bydepth; while (defined $SE) { unless ($bydepth) { # change (back) to parent directory (always untainted) unless ($no_chdir) { unless (chdir $updir_loc) { warnings::warnif "Can't cd to $updir_loc: $!\n"; next; } } $dir= $p_dir; # $File::Find::dir $name= $dir_name; # $File::Find::name $_= ($no_chdir ? $dir_name : $dir_rel ); # $_ $fullname= $dir_loc; # $File::Find::fullname # prune may happen here $prune= 0; lstat($_); # make sure file tests with '_' work { $wanted_callback->() }; # protect against wild "next" next if $prune; } # change to that directory unless ($no_chdir || ($dir_rel eq $File::Find::current_dir)) { $updir_loc = $dir_loc; if ( ($untaint) && (($tainted) || ($tainted = is_tainted($dir_loc) )) ) { # untaint $dir_loc, what will be pushed on the stack as (untainted) parent dir ( $updir_loc ) = $dir_loc =~ m|$untaint_pat|; unless (defined $updir_loc) { if ($untaint_skip == 0) { die "directory $dir_loc is still tainted"; } else { next; } } } unless (chdir $updir_loc) { warnings::warnif "Can't cd to $updir_loc: $!\n"; next; } } $dir = $dir_name; # $File::Find::dir # Get the list of files in the current directory. unless (opendir DIR, ($no_chdir ? $dir_loc : $File::Find::current_dir)) { warnings::warnif "Can't opendir($dir_loc): $!\n"; next; } @filenames = readdir DIR; closedir(DIR); for my $FN (@filenames) { if ($Is_VMS) { # Big hammer here - Compensate for VMS trailing . and .dir # No win situation until this is changed, but this # will handle the majority of the cases with breaking the fewest. $FN =~ s/\.dir\z//i; $FN =~ s#\.$## if ($FN ne '.'); } next if $FN =~ $File::Find::skip_pattern; # follow symbolic links / do an lstat $new_loc = Follow_SymLink($loc_pref.$FN); # ignore if invalid symlink unless (defined $new_loc) { if (!defined -l _ && $dangling_symlinks) { if (ref $dangling_symlinks eq 'CODE') { $dangling_symlinks->($FN, $dir_pref); } else { warnings::warnif "$dir_pref$FN is a dangling symbolic link\n"; } } $fullname = undef; $name = $dir_pref . $FN; $_ = ($no_chdir ? $name : $FN); { $wanted_callback->() }; next; } if (-d _) { if ($Is_VMS) { $FN =~ s/\.dir\z//i; $FN =~ s#\.$## if ($FN ne '.'); $new_loc =~ s/\.dir\z//i; $new_loc =~ s#\.$## if ($new_loc ne '.'); } push @Stack,[$new_loc,$updir_loc,$dir_name,$FN,1]; } else { $fullname = $new_loc; # $File::Find::fullname $name = $dir_pref . $FN; # $File::Find::name $_ = ($no_chdir ? $name : $FN); # $_ { $wanted_callback->() }; # protect against wild "next" } } } continue { while (defined($SE = pop @Stack)) { ($dir_loc, $updir_loc, $p_dir, $dir_rel, $byd_flag) = @$SE; $dir_name = ($p_dir eq '/' ? "/$dir_rel" : "$p_dir/$dir_rel"); $dir_pref = "$dir_name/"; $loc_pref = "$dir_loc/"; if ( $byd_flag < 0 ) { # must be finddepth, report dirname now unless ($no_chdir || ($dir_rel eq $File::Find::current_dir)) { unless (chdir $updir_loc) { # $updir_loc (parent dir) is always untainted warnings::warnif "Can't cd to $updir_loc: $!\n"; next; } } $fullname = $dir_loc; # $File::Find::fullname $name = $dir_name; # $File::Find::name if ( substr($name,-2) eq '/.' ) { substr($name, length($name) == 2 ? -1 : -2) = ''; # $File::Find::name } $dir = $p_dir; # $File::Find::dir $_ = ($no_chdir ? $dir_name : $dir_rel); # $_ if ( substr($_,-2) eq '/.' ) { substr($_, length($_) == 2 ? -1 : -2) = ''; } lstat($_); # make sure file tests with '_' work { $wanted_callback->() }; # protect against wild "next" } else { push @Stack,[$dir_loc, $updir_loc, $p_dir, $dir_rel,-1] if $bydepth; last; } } } } sub wrap_wanted { my $wanted = shift; if ( ref($wanted) eq 'HASH' ) { unless( exists $wanted->{wanted} and ref( $wanted->{wanted} ) eq 'CODE' ) { die 'no &wanted subroutine given'; } if ( $wanted->{follow} || $wanted->{follow_fast}) { $wanted->{follow_skip} = 1 unless defined $wanted->{follow_skip}; } if ( $wanted->{untaint} ) { $wanted->{untaint_pattern} = $File::Find::untaint_pattern unless defined $wanted->{untaint_pattern}; $wanted->{untaint_skip} = 0 unless defined $wanted->{untaint_skip}; } return $wanted; } elsif( ref( $wanted ) eq 'CODE' ) { return { wanted => $wanted }; } else { die 'no &wanted subroutine given'; } } sub find { my $wanted = shift; _find_opt(wrap_wanted($wanted), @_); } sub finddepth { my $wanted = wrap_wanted(shift); $wanted->{bydepth} = 1; _find_opt($wanted, @_); } # default $File::Find::skip_pattern = qr/^\.{1,2}\z/; $File::Find::untaint_pattern = qr|^([-+@\w./]+)$|; # These are hard-coded for now, but may move to hint files. if ($^O eq 'VMS') { $Is_VMS = 1; $File::Find::dont_use_nlink = 1; } elsif ($^O eq 'MSWin32') { $Is_Win32 = 1; } # this _should_ work properly on all platforms # where File::Find can be expected to work $File::Find::current_dir = File::Spec->curdir || '.'; $File::Find::dont_use_nlink = 1 if $^O eq 'os2' || $^O eq 'dos' || $^O eq 'amigaos' || $Is_Win32 || $^O eq 'interix' || $^O eq 'cygwin' || $^O eq 'epoc' || $^O eq 'qnx' || $^O eq 'nto'; # Set dont_use_nlink in your hint file if your system's stat doesn't # report the number of links in a directory as an indication # of the number of files. # See, e.g. hints/machten.sh for MachTen 2.2. unless ($File::Find::dont_use_nlink) { require Config; $File::Find::dont_use_nlink = 1 if ($Config::Config{'dont_use_nlink'}); } # We need a function that checks if a scalar is tainted. Either use the # Scalar::Util module's tainted() function or our (slower) pure Perl # fallback is_tainted_pp() { local $@; eval { require Scalar::Utpackage File::GlobMapper; use strict; use warnings; use Carp; our ($CSH_GLOB); BEGIN { if ($] < 5.006) { require File::BSDGlob; import File::BSDGlob qw(:glob) ; $CSH_GLOB = File::BSDGlob::GLOB_CSH() ; *globber = \&File::BSDGlob::csh_glob; } else { require File::Glob; import File::Glob qw(:glob) ; $CSH_GLOB = File::Glob::GLOB_CSH() ; #*globber = \&File::Glob::bsd_glob; *globber = \&File::Glob::csh_glob; } } our ($Error); our ($VERSION, @EXPORT_OK); $VERSION = '1.000'; @EXPORT_OK = qw( globmap ); our ($noPreBS, $metachars, $matchMetaRE, %mapping, %wildCount); $noPreBS = '(? '([^/]*)', '?' => '([^/])', '.' => '\.', '[' => '([', '(' => '(', ')' => ')', ); %wildCount = map { $_ => 1 } qw/ * ? . { ( [ /; sub globmap ($$;) { my $inputGlob = shift ; my $outputGlob = shift ; my $obj = new File::GlobMapper($inputGlob, $outputGlob, @_) or croak "globmap: $Error" ; return $obj->getFileMap(); } sub new { my $class = shift ; my $inputGlob = shift ; my $outputGlob = shift ; # TODO -- flags needs to default to whatever File::Glob does my $flags = shift || $CSH_GLOB ; #my $flags = shift ; $inputGlob =~ s/^\s*\<\s*//; $inputGlob =~ s/\s*\>\s*$//; $outputGlob =~ s/^\s*\<\s*//; $outputGlob =~ s/\s*\>\s*$//; my %object = ( InputGlob => $inputGlob, OutputGlob => $outputGlob, GlobFlags => $flags, Braces => 0, WildCount => 0, Pairs => [], Sigil => '#', ); my $self = bless \%object, ref($class) || $class ; $self->_parseInputGlob() or return undef ; $self->_parseOutputGlob() or return undef ; my @inputFiles = globber($self->{InputGlob}, $flags) ; if (GLOB_ERROR) { $Error = $!; return undef ; } #if (whatever) { my $missing = grep { ! -e $_ } @inputFiles ; if ($missing) { $Error = "$missing input files do not exist"; return undef ; } } $self->{InputFiles} = \@inputFiles ; $self->_getFiles() or return undef ; return $self; } sub _retError { my $string = shift ; $Error = "$string in input fileglob" ; return undef ; } sub _unmatched { my $delimeter = shift ; _retError("Unmatched $delimeter"); return undef ; } sub _parseBit { my $self = shift ; my $string = shift ; my $out = ''; my $depth = 0 ; while ($string =~ s/(.*?)$noPreBS(,|$matchMetaRE)//) { $out .= quotemeta($1) ; $out .= $mapping{$2} if defined $mapping{$2}; ++ $self->{WildCount} if $wildCount{$2} ; if ($2 eq ',') { return _unmatched "(" if $depth ; $out .= '|'; } elsif ($2 eq '(') { ++ $depth ; } elsif ($2 eq ')') { return _unmatched ")" if ! $depth ; -- $depth ; } elsif ($2 eq '[') { # TODO -- quotemeta & check no '/' # TODO -- check for \] & other \ within the [] $string =~ s#(.*?\])## or return _unmatched "[" ; $out .= "$1)" ; } elsif ($2 eq ']') { return _unmatched "]" ; } elsif ($2 eq '{' || $2 eq '}') { return _retError "Nested {} not allowed" ; } } $out .= quotemeta $string; return _unmatched "(" if $depth ; return $out ; } sub _parseInputGlob { my $self = shift ; my $string = $self->{InputGlob} ; my $inGlob = ''; # Multiple concatenated *'s don't make sense #$string =~ s#\*\*+#*# ; # TODO -- Allow space to delimit patterns? #my @strings = split /\s+/, $string ; #for my $str (@strings) my $out = ''; my $depth = 0 ; while ($string =~ s/(.*?)$noPreBS($matchMetaRE)//) { $out .= quotemeta($1) ; $out .= $mapping{$2} if defined $mapping{$2}; ++ $self->{WildCount} if $wildCount{$2} ; if ($2 eq '(') { ++ $depth ; } elsif ($2 eq ')') { return _unmatched ")" if ! $depth ; -- $depth ; } elsif ($2 eq '[') { # TODO -- quotemeta & check no '/' or '(' or ')' # TODO -- check for \] & other \ within the [] $string =~ s#(.*?\])## or return _unmatched "["; $out .= "$1)" ; } elsif ($2 eq ']') { return _unmatched "]" ; } elsif ($2 eq '}') { return _unmatched "}" ; } elsif ($2 eq '{') { # TODO -- check no '/' within the {} # TODO -- check for \} & other \ within the {} my $tmp ; unless ( $string =~ s/(.*?)$noPreBS\}//) { return _unmatched "{"; } #$string =~ s#(.*?)\}##; #my $alt = join '|', # map { quotemeta $_ } # split "$noPreBS,", $1 ; my $alt = $self->_parseBit($1); defined $alt or return 0 ; $out .= "($alt)" ; ++ $self->{Braces} ; } } return _unmatched "(" if $depth ; $out .= quotemeta $string ; $self->{InputGlob} =~ s/$noPreBS[\(\)]//g; $self->{InputPattern} = $out ; #print "# INPUT '$self->{InputGlob}' => '$out'\n"; return 1 ; } sub _parseOutputGlob { my $self = shift ; my $string = $self->{OutputGlob} ; my $maxwild = $self->{WildCount}; if ($self->{GlobFlags} & GLOB_TILDE) #if (1) { $string =~ s{ ^ ~ # find a leading tilde ( # save this in $1 [^/] # a non-slash character * # repeated 0 or more times (0 means me) ) }{ $1 ? (getpwnam($1))[7] : ( $ENV{HOME} || $ENV{LOGDIR} ) }ex; } # max #1 must be == to max no of '*' in input while ( $string =~ m/#(\d)/g ) { croak "Max wild is #$maxwild, you tried #$1" if $1 > $maxwild ; } my $noPreBS = '(?{OutputGlob}' => '$string'\n"; $self->{OutputPattern} = $string ; return 1 ; } sub _getFiles { my $self = shift ; my %outInMapping = (); my %inFiles = () ; foreach my $inFile (@{ $self->{InputFiles} }) { next if $inFiles{$inFile} ++ ; my $outFile = $inFile ; if ( $inFile =~ m/$self->{InputPattern}/ ) { no warnings 'uninitialized'; eval "\$outFile = $self->{OutputPattern};" ; if (defined $outInMapping{$outFile}) { $Error = "multiple input files map to one output file"; return undef ; } $outInMapping{$outFile} = $inFile; push @{ $self->{Pairs} }, [$inFile, $outFile]; } } return 1 ; } sub getFileMap { my $self = shift ; return $self->{Pairs} ; } sub getHash { my $self = shift ; return { map { $_->[0] => $_->[1] } @{ $self->{Pairs} } } ; } 1; __END__ =head1 NAME File::GlobMapper - Extend File Glob to Allow Input and Output Files =head1 SYNOPSIS use File::GlobMapper qw( globmap ); my $aref = globmap $input => $output or die $File::GlobMapper::Error ; my $gm = new File::GlobMapper $input => $output or die $File::GlobMapper::Error ; =head1 DESCRIPTION This module needs Perl5.005 or better. This module takes the existing C module as a starting point and extends it to allow new filenames to be derived from the files matched by C. This can be useful when carrying out batch operations on multiple files that have both an input filename and output filename and the output file can be derived from the input filename. Examples of operations where this can be useful include, file renaming, file copying and file compression. =head2 Behind The Scenes To help explain what C does, consider what code you would write if you wanted to rename all files in the current directory that ended in C<.tar.gz> to C<.tgz>. So say these files are in the current directory alpha.tar.gz beta.tar.gz gamma.tar.gz and they need renamed to this alpha.tgz beta.tgz gamma.tgz Below is a possible implementation of a script to carry out the rename (error cases have been omitted) foreach my $old ( glob "*.tar.gz" ) { my $new = $old; $new =~ s#(.*)\.tar\.gz$#$1.tgz# ; rename $old => $new or die "Cannot rename '$old' to '$new': $!\n; } Notice that a file glob pattern C<*.tar.gz> was used to match the C<.tar.gz> files, then a fairly similar regular expression was used in the substitute to allow the new filename to be created. Given that the file glob is just a cut-down regular expression and that it has already done a lot of the hard work in pattern matching the filenames, wouldn't it be handy to be able to use the patterns in the fileglob to drive the new filename? Well, that's I what C does. Here is same snippet of code rewritten using C for my $pair (globmap '<*.tar.gz>' => '<#1.tgz>' ) { my ($from, $to) = @$pair; rename $from => $to or die "Cannot rename '$old' to '$new': $!\n; } So how does it work? Behind the scenes the C function does a combination of a file glob to match existing filenames followed by a substitute to create the new filenames. Notice how both parameters to C are strings that are delimited by <>. This is done to make them look more like file globs - it is just syntactic sugar, but it can be handy when you want the strings to be visually distinctive. The enclosing <> are optional, so you don't have to use them - in fact the first thing globmap will do is remove these delimiters if they are present. The first parameter to C, C<*.tar.gz>, is an I. Once the enclosing "< ... >" is removed, this is passed (more or less) unchanged to C to carry out a file match. Next the fileglob C<*.tar.gz> is transformed behind the scenes into a full Perl regular expression, with the additional step of wrapping each transformed wildcard metacharacter sequence in parenthesis. In this case the input fileglob C<*.tar.gz> will be transformed into this Perl regular expression ([^/]*)\.tar\.gz Wrapping with parenthesis allows the wildcard parts of the Input File Glob to be referenced by the second parameter to C, C<#1.tgz>, the I. This parameter operates just like the replacement part of a substitute command. The difference is that the C<#1> syntax is used to reference sub-patterns matched in the input fileglob, rather than the C<$1> syntax that is used with perl regular expressions. In this case C<#1> is used to refer to the text matched by the C<*> in the Input File Glob. This makes it easier to use this module where the parameters to C are typed at the command line. The final step involves passing each filename matched by the C<*.tar.gz> file glob through the derived Perl regular expression in turn and expanding the output fileglob using it. The end result of all this is a list of pairs of filenames. By default that is what is returned by C. In this example the data structure returned will look like this ( ['alpha.tar.gz' => 'alpha.tgz'], ['beta.tar.gz' => 'beta.tgz' ], ['gamma.tar.gz' => 'gamma.tgz'] ) Each pair is an array reference with two elements - namely the I filename, that C has matched, and a I filename that is derived from the I filename. =head2 Limitations C has been kept simple deliberately, so it isn't intended to solve all filename mapping operations. Under the hood C (or for older versions of Perl, C) is used to match the files, so you will never have the flexibility of full Perl regular expression. =head2 Input File Glob The syntax for an Input FileGlob is identical to C, except for the following =over 5 =item 1. No nested {} =item 2. Whitespace does not delimit fileglobs. =item 3. The use of parenthesis can be used to capture parts of the input filename. =item 4. If an Input glob matches the same file more than once, only the first will be used. =back The syntax =over 5 =item B<~> =item B<~user> =item B<.> Matches a literal '.'. Equivalent to the Perl regular expression \. =item B<*> Matches zero or more characters, except '/'. Equivalent to the Perl regular expression [^/]* =item B Matches zero or one character, except '/'. Equivalent to the Perl regular expression [^/]? =item B<\> Backslash is used, as usual, to escape the next character. =item B<[]> Character class. =item B<{,}> Alternation =item B<()> Capturing parenthesis that work just like perl =back Any other character it taken literally. =head2 Output File Glob The Output File Glob is a normal string, with 2 glob-like features. The first is the '*' metacharacter. This will be replaced by the complete filename matched by the input file glob. So *.c *.Z The second is Output FileGlobs take the =over 5 =item "*" The "*" character will be replaced with the complete input filename. =item #1 Patterns of the form /#\d/ will be replaced with the =back =head2 Returned Data =head1 EXAMPLES =head2 A Rename script Below is a simple "rename" script that uses C to determine the source and destination filenames. use File::GlobMapper qw(globmap) ; use File::Copy; die "rename: Usage rename 'from' 'to'\n" unless @ARGV == 2 ; my $fromGlob = shift @ARGV; my $toGlob = shift @ARGV; my $pairs = globmap($fromGlob, $toGlob) or die $File::GlobMapper::Error; for my $pair (@$pairs) { my ($from, $to) = @$pair; move $from => $to ; } Here is an example that renames all c files to cpp. $ rename '*.c' '#1.cpp' =head2 A few example globmaps Below are a few examples of globmaps To copy all your .c file to a backup directory '' '' If you want to compress all '' '<*.gz>' To uncompress '' '' =head1 SEE ALSO L =head1 AUTHOR The I module was written by Paul Marquess, F. =head1 COPYRIGHT AND LICENSE Copyright (c) 2005 Paul Marquess. All rights reserved. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. y. Currently maintained by David Landgren >. =head1 COPYRIGHT This module is copyright (C) Charles Bailey, Tim Bunce and David Landgren 1995-2009. All rights reserved. =head1 LICENSE This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut package File::Path; use 5.005_04; use strict; use Cwd 'getcwd'; use File::Basename (); use File::Spec (); BEGIN { if ($] < 5.006) { # can't say 'opendir my $dh, $dirname' # need to initialise $dh eval "use Symbol"; } } use Exporter (); use vars qw($VERSION @ISA @EXPORT @EXPORT_OK); $VERSION = '2.08_01'; @ISA = qw(Exporter); @EXPORT = qw(mkpath rmtree); @EXPORT_OK = qw(make_path remove_tree); my $Is_VMS = $^O eq 'VMS'; my $Is_MacOS = $^O eq 'MacOS'; # These OSes complain if you want to remove a file that you have no # write permission to: my $Force_Writeable = grep {$^O eq $_} qw(amigaos dos epoc MSWin32 MacOS os2); # Unix-like systems need to stat each directory in order to detect # race condition. MS-Windows is immune to this particular attack. my $Need_Stat_Check = !($^O eq 'MSWin32'); sub _carp { require Carp; goto &Carp::carp; } sub _croak { require Carp; goto &Carp::croak; } sub _error { my $arg = shift; my $message = shift; my $object = shift; if ($arg->{error}) { $object = '' unless defined $object; $message .= ": $!" if $!; push @{${$arg->{error}}}, {$object => $message}; } else { _carp(defined($object) ? "$message for $object: $!" : "$message: $!"); } } sub make_path { push @_, {} unless @_ and UNIVERSAL::isa($_[-1],'HASH'); goto &mkpath; } sub mkpath { my $old_style = !(@_ and UNIVERSAL::isa($_[-1],'HASH')); my $arg; my $paths; if ($old_style) { my ($verbose, $mode); ($paths, $verbose, $mode) = @_; $paths = [$paths] unless UNIVERSAL::isa($paths,'ARRAY'); $arg->{verbose} = $verbose; $arg->{mode} = defined $mode ? $mode : 0777; } else { $arg = pop @_; $arg->{mode} = delete $arg->{mask} if exists $arg->{mask}; $arg->{mode} = 0777 unless exists $arg->{mode}; ${$arg->{error}} = [] if exists $arg->{error}; $arg->{owner} = delete $arg->{user} if exists $arg->{user}; $arg->{owner} = delete $arg->{uid} if exists $arg->{uid}; if (exists $arg->{owner} and $arg->{owner} =~ /\D/) { my $uid = (getpwnam $arg->{owner})[2]; if (defined $uid) { $arg->{owner} = $uid; } else { _error($arg, "unable to map $arg->{owner} to a uid, ownership not changed"); delete $arg->{owner}; } } if (exists $arg->{group} and $arg->{group} =~ /\D/) { my $gid = (getgrnam $arg->{group})[2]; if (defined $gid) { $arg->{group} = $gid; } else { _error($arg, "unable to map $arg->{group} to a gid, group ownership not changed"); delete $arg->{group}; } } if (exists $arg->{owner} and not exists $arg->{group}) { $arg->{group} = -1; # chown will leave group unchanged } if (exists $arg->{group} and not exists $arg->{owner}) { $arg->{owner} = -1; # chown will leave owner unchanged } $paths = [@_]; } return _mkpath($arg, $paths); } sub _mkpath { my $arg = shift; my $paths = shift; my(@created,$path); foreach $path (@$paths) { next unless defined($path) and length($path); $path .= '/' if $^O eq 'os2' and $path =~ /^\w:\z/s; # feature of CRT # Logic wants Unix paths, so go with the flow. if ($Is_VMS) { next if $path eq '/'; $path = VMS::Filespec::unixify($path); } next if -d $path; my $parent = File::Basename::dirname($path); unless (-d $parent or $path eq $parent) { push(@created,_mkpath($arg, [$parent])); } print "mkdir $path\n" if $arg->{verbose}; if (mkdir($path,$arg->{mode})) { push(@created, $path); if (exists $arg->{owner}) { # NB: $arg->{group} guaranteed to be set during initialisation if (!chown $arg->{owner}, $arg->{group}, $path) { _error($arg, "Cannot change ownership of $path to $arg->{owner}:$arg->{group}"); } } } else { my $save_bang = $!; my ($e, $e1) = ($save_bang, $^E); $e .= "; $e1" if $e ne $e1; # allow for another process to have created it meanwhile if (!-d $path) { $! = $save_bang; if ($arg->{error}) { push @{${$arg->{error}}}, {$path => $e}; } else { _croak("mkdir $path: $e"); } } } } return @created; } sub remove_tree { push @_, {} unless @_ and UNIVERSAL::isa($_[-1],'HASH'); goto &rmtree; } sub _is_subdir { my($dir, $test) = @_; my($dv, $dd) = File::Spec->splitpath($dir, 1); my($tv, $td) = File::Spec->splitpath($test, 1); # not on same volume return 0 if $dv ne $tv; my @d = File::Spec->splitdir($dd); my @t = File::Spec->splitdir($td); # @t can't be a subdir if it's shorter than @d return 0 if @t < @d; return join('/', @d) eq join('/', splice @t, 0, +@d); } sub rmtree { my $old_style = !(@_ and UNIVERSAL::isa($_[-1],'HASH')); my $arg; my $paths; if ($old_style) { my ($verbose, $safe); ($paths, $verbose, $safe) = @_; $arg->{verbose} = $verbose; $arg->{safe} = defined $safe ? $safe : 0; if (defined($paths) and length($paths)) { $paths = [$paths] unless UNIVERSAL::isa($paths,'ARRAY'); } else { _carp ("No root path(s) specified\n"); return 0; } } else { $arg = pop @_; ${$arg->{error}} = [] if exists $arg->{error}; ${$arg->{result}} = [] if exists $arg->{result}; $paths = [@_]; } $arg->{prefix} = ''; $arg->{depth} = 0; my @clean_path; $arg->{cwd} = getcwd() or do { _error($arg, "cannot fetch initial working directory"); return 0; }; for ($arg->{cwd}) { /\A(.*)\Z/; $_ = $1 } # untaint for my $p (@$paths) { # need to fixup case and map \ to / on Windows my $ortho_root = $^O eq 'MSWin32' ? _slash_lc($p) : $p; my $ortho_cwd = $^O eq 'MSWin32' ? _slash_lc($arg->{cwd}) : $arg->{cwd}; my $ortho_root_length = length($ortho_root); $ortho_root_length-- if $^O eq 'VMS'; # don't compare '.' with ']' if ($ortho_root_length && _is_subdir($ortho_root, $ortho_cwd)) { local $! = 0; _error($arg, "cannot remove path when cwd is $arg->{cwd}", $p); next; } if ($Is_MacOS) { $p = ":$p" unless $p =~ /:/; $p .= ":" unless $p =~ /:\z/; } elsif ($^O eq 'MSWin32') { $p =~ s{[/\\]\z}{}; } else { $p =~ s{/\z}{}; } push @clean_path, $p; } @{$arg}{qw(device inode perm)} = (lstat $arg->{cwd})[0,1] or do { _error($arg, "cannot stat initial working directory", $arg->{cwd}); return 0; }; return _rmtree($arg, \@clean_path); } sub _rmtree { my $arg = shift; my $paths = shift; my $count = 0; my $curdir = File::Spec->curdir(); my $updir = File::Spec->updir(); my (@files, $root); ROOT_DIR: foreach $root (@$paths) { # since we chdir into each directory, it may not be obvious # to figure out where we are if we generate a message about # a file name. We therefore construct a semi-canonical # filename, anchored from the directory being unlinked (as # opposed to being truly canonical, anchored from the root (/). my $canon = $arg->{prefix} ? File::Spec->catfile($arg->{prefix}, $root) : $root ; my ($ldev, $lino, $perm) = (lstat $root)[0,1,2] or next ROOT_DIR; if ( -d _ ) { $root = VMS::Filespec::vmspath(VMS::Filespec::pathify($root)) if $Is_VMS; if (!chdir($root)) { # see if we can escalate privileges to get in # (e.g. funny protection mask such as -w- instead of rwx) $perm &= 07777; my $nperm = $perm | 0700; if (!($arg->{safe} or $nperm == $perm or chmod($nperm, $root))) { _error($arg, "cannot make child directory read-write-exec", $canon); next ROOT_DIR; } elsif (!chdir($root)) { _error($arg, "cannot chdir to child", $canon); next ROOT_DIR; } } my ($cur_dev, $cur_inode, $perm) = (stat $curdir)[0,1,2] or do { _error($arg, "cannot stat current working directory", $canon); next ROOT_DIR; }; if ($Need_Stat_Check) { ($ldev eq $cur_dev and $lino eq $cur_inode) or _croak("directory $canon changed before chdir, expected dev=$ldev ino=$lino, actual dev=$cur_dev ino=$cur_inode, aborting."); } $perm &= 07777; # don't forget setuid, setgid, sticky bits my $nperm = $perm | 0700; # notabene: 0700 is for making readable in the first place, # it's also intended to change it to writable in case we have # to recurse in which case we are better than rm -rf for # subtrees with strange permissions if (!($arg->{safe} or $nperm == $perm or chmod($nperm, $curdir))) { _error($arg, "cannot make directory read+writeable", $canon); $nperm = $perm; } my $d; $d = gensym() if $] < 5.006; if (!opendir $d, $curdir) { _error($arg, "cannot opendir", $canon); @files = (); } else { no strict 'refs'; if (!defined ${"\cTAINT"} or ${"\cTAINT"}) { # Blindly untaint dir names if taint mode is # active, or any perl < 5.006 @files = map { /\A(.*)\z/s; $1 } readdir $d; } else { @files = readdir $d; } closedir $d; } if ($Is_VMS) { # Deleting large numbers of files from VMS Files-11 # filesystems is faster if done in reverse ASCIIbetical order. # include '.' to '.;' from blead patch #31775 @files = map {$_ eq '.' ? '.;' : $_} reverse @files; } @files = grep {$_ ne $updir and $_ ne $curdir} @files; if (@files) { # remove the contained files before the directory itself my $narg = {%$arg}; @{$narg}{qw(device inode cwd prefix depth)} = ($cur_dev, $cur_inode, $updir, $canon, $arg->{depth}+1); $count += _rmtree($narg, \@files); } # restore directory permissions of required now (in case the rmdir # below fails), while we are still in the directory and may do so # without a race via '.' if ($nperm != $perm and not chmod($perm, $curdir)) { _error($arg, "cannot reset chmod", $canon); } # don't leave the client code in an unexpected directory chdir($arg->{cwd}) or _croak("cannot chdir to $arg->{cwd} from $canon: $!, aborting."); # ensure that a chdir upwards didn't take us somewhere other # than we expected (see CVE-2002-0435) ($cur_dev, $cur_inode) = (stat $curdir)[0,1] or _croak("cannot stat prior working directory $arg->{cwd}: $!, aborting."); if ($Need_Stat_Check) { ($arg->{device} eq $cur_dev and $arg->{inode} eq $cur_inode) or _croak("previous directory $arg->{cwd} changed before entering $canon, expected dev=$ldev ino=$lino, actual dev=$cur_dev ino=$cur_inode, aborting."); } if ($arg->{depth} or !$arg->{keep_root}) { if ($arg->{safe} && ($Is_VMS ? !&VMS::Filespec::candelete($root) : !-w $root)) { print "skipped $root\n" if $arg->{verbose}; next ROOT_DIR; } if ($Force_Writeable and !chmod $perm | 0700, $root) { _error($arg, "cannot make directory writeable", $canon); } print "rmdir $root\n" if $arg->{verbose}; if (rmdir $root) { push @{${$arg->{result}}}, $root if $arg->{result}; ++$count; } else { _error($arg, "cannot remove directory", $canon); if ($Force_Writeable && !chmod($perm, ($Is_VMS ? VMS::Filespec::fileify($root) : $root)) ) { _error($arg, sprintf("cannot restore permissions to 0%o",$perm), $canon); } } } } else { # not a directory $root = VMS::Filespec::vmsify("./$root") if $Is_VMS && !File::Spec->file_name_is_absolute($root) && ($root !~ m/(?]+/); # not already in VMS syntax if ($arg->{safe} && ($Is_VMS ? !&VMS::Filespec::candelete($root) : !(-l $root || -w $root))) { print "skipped $root\n" if $arg->{verbose}; next ROOT_DIR; } my $nperm = $perm & 07777 | 0600; if ($Force_Writeable and $nperm != $perm and not chmod $nperm, $root) { _error($arg, "cannot make file writeable", $canon); } print "unlink $canon\n" if $arg->{verbose}; # delete all versions under VMS for (;;) { if (unlink $root) { push @{${$arg->{result}}}, $root if $arg->{result}; } else { _error($arg, "cannot unlink file", $canon); $Force_Writeable and chmod($perm, $root) or _error($arg, sprintf("cannot restore permissions to 0%o",$perm), $canon); last; } ++$count; last unless $Is_VMS && lstat $root; } } } return $count; } sub _slash_lc { # fix up slashes and case on MSWin32 so that we can determine that # c:\path\to\dir is underneath C:/Path/To my $path = shift; $path =~ tr{\\}{/}; return lc($path); } 1; __END__ =head1 NAME File::Path - Create or remove directory trees =head1 VERSION This document describes version 2.08 of File::Path, released 2009-10-04. =head1 SYNOPSIS use File::Path qw(make_path remove_tree); make_path('foo/bar/baz', '/zug/zwang'); make_path('foo/bar/baz', '/zug/zwang', { verbose => 1, mode => 0711, }); remove_tree('foo/bar/baz', '/zug/zwang'); remove_tree('foo/bar/baz', '/zug/zwang', { verbose => 1, error => \my $err_list, }); # legacy (interface promoted before v2.00) mkpath('/foo/bar/baz'); mkpath('/foo/bar/baz', 1, 0711); mkpath(['/foo/bar/baz', 'blurfl/quux'], 1, 0711); rmtree('foo/bar/baz', 1, 1); rmtree(['foo/bar/baz', 'blurfl/quux'], 1, 1); # legacy (interface promoted before v2.06) mkpath('foo/bar/baz', '/zug/zwang', { verbose => 1, mode => 0711 }); rmtree('foo/bar/baz', '/zug/zwang', { verbose => 1, mode => 0711 }); =head1 DESCRIPTION This module provide a convenient way to create directories of arbitrary depth and to delete an entire directory subtree from the filesystem. The following functions are provided: =over =item make_path( $dir1, $dir2, .... ) =item make_path( $dir1, $dir2, ...., \%opts ) The C function creates the given directories if they don't exists before, much like the Unix command C. The function accepts a list of directories to be created. Its behaviour may be tuned by an optional hashref appearing as the last parameter on the call. The function returns the list of directories actually created during the call; in scalar context the number of directories created. The following keys are recognised in the option hash: =over =item mode => $num The numeric permissions mode to apply to each created directory (defaults to 0777), to be modified by the current C. If the directory already exists (and thus does not need to be created), the permissions will not be modified. C is recognised as an alias for this parameter. =item verbose => $bool If present, will cause C to print the name of each directory as it is created. By default nothing is printed. =item error => \$err If present, it should be a reference to a scalar. This scalar will be made to reference an array, which will be used to store any errors that are encountered. See the L section for more information. If this parameter is not used, certain error conditions may raise a fatal error that will cause the program will halt, unless trapped in an C block. =item owner => $owner =item user => $owner =item uid => $owner If present, will cause any created directory to be owned by C<$owner>. If the value is numeric, it will be interpreted as a uid, otherwise as username is assumed. An error will be issued if the username cannot be mapped to a uid, or the uid does not exist, or the process lacks the privileges to change ownership. Ownwership of directories that already exist will not be changed. C and C are aliases of C. =item group => $group If present, will cause any created directory to be owned by the group C<$group>. If the value is numeric, it will be interpreted as a gid, otherwise as group name is assumed. An error will be issued if the group name cannot be mapped to a gid, or the gid does not exist, or the process lacks the privileges to change group ownership. Group ownwership of directories that already exist will not be changed. make_path '/var/tmp/webcache', {owner=>'nobody', group=>'nogroup'}; =back =item mkpath( $dir ) =item mkpath( $dir, $verbose, $mode ) =item mkpath( [$dir1, $dir2,...], $verbose, $mode ) =item mkpath( $dir1, $dir2,..., \%opt ) The mkpath() function provide the legacy interface of make_path() with a different interpretation of the arguments passed. The behaviour and return value of the function is otherwise identical to make_path(). =item remove_tree( $dir1, $dir2, .... ) =item remove_tree( $dir1, $dir2, ...., \%opts ) The C function deletes the given directories and any files and subdirectories they might contain, much like the Unix command C or C on Windows. The function accepts a list of directories to be removed. Its behaviour may be tuned by an optional hashref appearing as the last parameter on the call. The functions returns the number of files successfully deleted. The following keys are recognised in the option hash: =over =item verbose => $bool If present, will cause C to print the name of each file as it is unlinked. By default nothing is printed. =item safe => $bool When set to a true value, will cause C to skip the files for which the process lacks the required privileges needed to delete files, such as delete privileges on VMS. In other words, the code will make no attempt to alter file permissions. Thus, if the process is interrupted, no filesystem object will be left in a more permissive mode. =item keep_root => $bool When set to a true value, will cause all files and subdirectories to be removed, except the initially specified directories. This comes in handy when cleaning out an application's scratch directory. remove_tree( '/tmp', {keep_root => 1} ); =item result => \$res If present, it should be a reference to a scalar. This scalar will be made to reference an array, which will be used to store all files and directories unlinked during the call. If nothing is unlinked, the array will be empty. remove_tree( '/tmp', {result => \my $list} ); print "unlinked $_\n" for @$list; This is a useful alternative to the C key. =item error => \$err If present, it should be a reference to a scalar. This scalar will be made to reference an array, which will be used to store any errors that are encountered. See the L section for more information. Removing things is a much more dangerous proposition than creating things. As such, there are certain conditions that C may encounter that are so dangerous that the only sane action left is to kill the program. Use C to trap all that is reasonable (problems with permissions and the like), and let it die if things get out of hand. This is the safest course of action. =back =item rmtree( $dir ) =item rmtree( $dir, $verbose, $safe ) =item rmtree( [$dir1, $dir2,...], $verbose, $safe ) =item rmtree( $dir1, $dir2,..., \%opt ) The rmtree() function provide the legacy interface of remove_tree() with a different interpretation of the arguments passed. The behaviour and return value of the function is otherwise identical to remove_tree(). =back =head2 ERROR HANDLING =over 4 =item B The following error handling mechanism is considered experimental and is subject to change pending feedback from users. =back If C or C encounter an error, a diagnostic message will be printed to C via C (for non-fatal errors), or via C (for fatal errors). If this behaviour is not desirable, the C attribute may be used to hold a reference to a variable, which will be used to store the diagnostics. The variable is made a reference to an array of hash references. Each hash contain a single key/value pair where the key is the name of the file, and the value is the error message (including the contents of C<$!> when appropriate). If a general error is encountered the diagnostic key will be empty. An example usage looks like: remove_tree( 'foo/bar', 'bar/rat', {error => \my $err} ); if (@$err) { for my $diag (@$err) { my ($file, $message) = %$diag; if ($file eq '') { print "general error: $message\n"; } else { print "problem unlinking $file: $message\n"; } } } else { print "No error encountered\n"; } Note that if no errors are encountered, C<$err> will reference an empty array. This means that C<$err> will always end up TRUE; so you need to test C<@$err> to determine if errors occured. =head2 NOTES C blindly exports C and C into the current namespace. These days, this is considered bad style, but to change it now would break too much code. Nonetheless, you are invited to specify what it is you are expecting to use: use File::Path 'rmtree'; The routines C and C are B exported by default. You must specify which ones you want to use. use File::Path 'remove_tree'; Note that a side-effect of the above is that C and C are no longer exported at all. This is due to the way the C module works. If you are migrating a codebase to use the new interface, you will have to list everything explicitly. But that's just good practice anyway. use File::Path qw(remove_tree rmtree); =head3 API CHANGES The API was changed in the 2.0 branch. For a time, C and C tried, unsuccessfully, to deal with the two different calling mechanisms. This approach was considered a failure. The new semantics are now only available with C and C. The old semantics are only available through C and C. Users are strongly encouraged to upgrade to at least 2.08 in order to avoid surprises. =head3 SECURITY CONSIDERATIONS There were race conditions 1.x implementations of File::Path's C function (although sometimes patched depending on the OS distribution or platform). The 2.0 version contains code to avoid the problem mentioned in CVE-2002-0435. See the following pages for more information: http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=286905 http://www.nntp.perl.org/group/perl.perl5.porters/2005/01/msg97623.html http://www.debian.org/security/2005/dsa-696 Additionally, unless the C parameter is set (or the third parameter in the traditional interface is TRUE), should a C be interrupted, files that were originally in read-only mode may now have their permissions set to a read-write (or "delete OK") mode. =head1 DIAGNOSTICS FATAL errors will cause the program to halt (C), since the problem is so severe that it would be dangerous to continue. (This can always be trapped with C, but it's not a good idea. Under the circumstances, dying is the best thing to do). SEVERE errors may be trapped using the modern interface. If the they are not trapped, or the old interface is used, such an error will cause the program will halt. All other errors may be trapped using the modern interface, otherwise they will be Ced about. Program execution will not be halted. =over 4 =item mkdir [path]: [errmsg] (SEVERE) C was unable to create the path. Probably some sort of permissions error at the point of departure, or insufficient resources (such as free inodes on Unix). =item No root path(s) specified C was not given any paths to create. This message is only emitted if the routine is called with the traditional interface. The modern interface will remain silent if given nothing to do. =item No such file or directory On Windows, if C gives you this warning, it may mean that you have exceeded your filesystem's maximum path length. =item cannot fetch initial working directory: [errmsg] C attempted to determine the initial directory by calling C, but the call failed for some reason. No attempt will be made to delete anything. =item cannot stat initial working directory: [errmsg] C attempted to stat the initial directory (after having successfully obtained its name via C), however, the call failed for some reason. No attempt will be made to delete anything. =item cannot chdir to [dir]: [errmsg] C attempted to set the working directory in order to begin deleting the objects therein, but was unsuccessful. This is usually a permissions issue. The routine will continue to delete other things, but this directory will be left intact. =item directory [dir] changed before chdir, expected dev=[n] ino=[n], actual dev=[n] ino=[n], aborting. (FATAL) C recorded the device and inode of a directory, and then moved into it. It then performed a C on the current directory and detected that the device and inode were no longer the same. As this is at the heart of the race condition problem, the program will die at this point. =item cannot make directory [dir] read+writeable: [errmsg] C attempted to change the permissions on the current directory to ensure that subsequent unlinkings would not run into problems, but was unable to do so. The permissions remain as they were, and the program will carry on, doing the best it can. =item cannot read [dir]: [errmsg] C tried to read the contents of the directory in order to acquire the names of the directory entries to be unlinked, but was unsuccessful. This is usually a permissions issue. The program will continue, but the files in this directory will remain after the call. =item cannot reset chmod [dir]: [errmsg] C, after having deleted everything in a directory, attempted to restore its permissions to the original state but failed. The directory may wind up being left behind. =item cannot remove [dir] when cwd is [dir] The current working directory of the program is F and you are attempting to remove an ancestor, such as F. The directory tree is left untouched. The solution is to C out of the child directory to a place outside the directory tree to be removed. =item cannot chdir to [parent-dir] from [child-dir]: [errmsg], aborting. (FATAL) C, after having deleted everything and restored the permissions of a directory, was unable to chdir back to the parent. The program halts to avoid a race condition from occurring. =item cannot stat prior working directory [dir]: [errmsg], aborting. (FATAL) C was unable to stat the parent directory after have returned from the child. Since there is no way of knowing if we returned to where we think we should be (by comparing device and inode) the only way out is to C. =item previous directory [parent-dir] changed before entering [child-dir], expected dev=[n] ino=[n], actual dev=[n] ino=[n], aborting. (FATAL) When C returned from deleting files in a child directory, a check revealed that the parent directory it returned to wasn't the one it started out from. This is considered a sign of malicious activity. =item cannot make directory [dir] writeable: [errmsg] Just before removing a directory (after having successfully removed everything it contained), C attempted to set the permissions on the directory to ensure it could be removed and failed. Program execution continues, but the directory may possibly not be deleted. =item cannot remove directory [dir]: [errmsg] C attempted to remove a directory, but failed. This may because some objects that were unable to be removed remain in the directory, or a permissions issue. The directory will be left behind. =item cannot restore permissions of [dir] to [0nnn]: [errmsg] After having failed to remove a directory, C was unable to restore its permissions from a permissive state back to a possibly more restrictive setting. (Permissions given in octal). =item cannot make file [file] writeable: [errmsg] C attempted to force the permissions of a file to ensure it could be deleted, but failed to do so. It will, however, still attempt to unlink the file. =item cannot unlink file [file]: [errmsg] C failed to remove a file. Probably a permissions issue. =item cannot restore permissions of [file] to [0nnn]: [errmsg] After having failed to remove a file, C was also unable to restore the permissions on the file to a possibly less permissive setting. (Permissions given in octal). =item unable to map [owner] to a uid, ownership not changed"); C was instructed to give the ownership of created directories to the symbolic name [owner], but C did not return the corresponding numeric uid. The directory will be created, but ownership will not be changed. =item unable to map [group] to a gid, group ownership not changed C was instructed to give the group ownership of created directories to the symbolic name [group], but C did not return the corresponding numeric gid. The directory will be created, but group ownership will not be changed. =back =head1 SEE ALSO =over 4 =item * L Allows files and directories to be moved to the Trashcan/Recycle Bin (where they may later be restored if necessary) if the operating system supports such functionality. This feature may one day be made available directly in C. =item * L When removing directory trees, if you want to examine each file to decide whether to delete it (and possibly leaving large swathes alone), F offers a convenient and flexible approach to examining directory trees. =back =head1 BUGS Please report all bugs on the RT queue: L =head1 ACKNOWLEDGEMENTS Paul Szabo identified the race condition originally, and Brendan O'Dea wrote an implementation for Debian that addressed the problem. That code was used as a basis for the current code. Their efforts are greatly appreciated. Gisle Aas made a number of improvements to the documentation for 2.07 and his advice and assistance is also greatly appreciated. =head1 AUTHORS Tim Bunce and Charles Bailepackage Locale::Language; # Copyright (C) 2001 Canon Research Centre Europe (CRE). # Copyright (C) 2002-2009 Neil Bowers # Copyright (c) 2010-2011 Sullivan Beck # This program is free software; you can redistribute it and/or modify it # under the same terms as Perl itself. use strict; use warnings; require 5.002; require Exporter; use Carp; use Locale::Codes; use Locale::Constants; use Locale::Codes::Language; #======================================================================= # Public Global Variables #======================================================================= our($VERSION,@ISA,@EXPORT,@EXPORT_OK); $VERSION='3.16'; @ISA = qw(Exporter); @EXPORT = qw(code2language language2code all_language_codes all_language_names language_code2code LOCALE_LANG_ALPHA_2 LOCALE_LANG_ALPHA_3 LOCALE_LANG_TERM ); sub _code { my($code,$codeset) = @_; $code = "" if (! $code); $codeset = LOCALE_LANG_DEFAULT if (! defined($codeset) || $codeset eq ""); if ($codeset =~ /^\d+$/) { if ($codeset == LOCALE_LANG_ALPHA_2) { $codeset = "alpha2"; } elsif ($codeset == LOCALE_LANG_ALPHA_3) { $codeset = "alpha3"; } elsif ($codeset == LOCALE_LANG_TERM) { $codeset = "term"; } else { return (1); } } if ($codeset eq "alpha2" || $codeset eq "alpha3" || $codeset eq "term") { $code = lc($code); } else { return (1); } return (0,$code,$codeset); } #======================================================================= # # code2language ( CODE [,CODESET] ) # #======================================================================= sub code2language { my($err,$code,$codeset) = _code(@_); return undef if ($err || ! defined $code); return Locale::Codes::_code2name("language",$code,$codeset); } #======================================================================= # # language2code ( LANGUAGE [,CODESET] ) # #======================================================================= sub language2code { my($language,$codeset) = @_; my($err,$tmp); ($err,$tmp,$codeset) = _code("",$codeset); return undef if ($err || ! defined $language); return Locale::Codes::_name2code("language",$language,$codeset); } #======================================================================= # # language_code2code ( CODE,CODESET_IN,CODESET_OUT ) # #======================================================================= sub language_code2code { (@_ == 3) or croak "language_code2code() takes 3 arguments!"; my($code,$inset,$outset) = @_; my($err,$tmp); ($err,$code,$inset) = _code($code,$inset); return undef if ($err); ($err,$tmp,$outset) = _code("",$outset); return undef if ($err); return Locale::Codes::_code2code("language",$code,$inset,$outset); } #======================================================================= # # all_language_codes ( [CODESET] ) # #======================================================================= sub all_language_codes { my($codeset) = @_; my($err,$tmp); ($err,$tmp,$codeset) = _code("",$codeset); return undef if ($err); return Locale::Codes::_all_codes("language",$codeset); } #======================================================================= # # all_language_names ( [CODESET] ) # #======================================================================= sub all_language_names { my($codeset) = @_; my($err,$tmp); ($err,$tmp,$codeset) = _code("",$codeset); return undef if ($err); return Locale::Codes::_all_names("language",$codeset); } #======================================================================= # # rename_language ( CODE,NAME [,CODESET] ) # #======================================================================= sub rename_language { my($code,$new_name,@args) = @_; my $nowarn = 0; $nowarn = 1, pop(@args) if ($args[$#args] eq "nowarn"); my $codeset = shift(@args); my $err; ($err,$code,$codeset) = _code($code,$codeset); return Locale::Codes::_rename("language",$code,$new_name,$codeset,$nowarn); } #======================================================================= # # add_language ( CODE,NAME [,CODESET] ) # #======================================================================= sub add_language { my($code,$name,@args) = @_; my $nowarn = 0; $nowarn = 1, pop(@args) if ($args[$#args] eq "nowarn"); my $codeset = shift(@args); my $err; ($err,$code,$codeset) = _code($code,$codeset); return Locale::Codes::_add_code("language",$code,$name,$codeset,$nowarn); } #======================================================================= # # delete_language ( CODE [,CODESET] ) # #======================================================================= sub delete_language { my($code,@args) = @_; my $nowarn = 0; $nowarn = 1, pop(@args) if ($args[$#args] eq "nowarn"); my $codeset = shift(@args); my $err; ($err,$code,$codeset) = _code($code,$codeset); return Locale::Codes::_delete_code("language",$code,$codeset,$nowarn); } #======================================================================= # # add_language_alias ( NAME,NEW_NAME ) # #======================================================================= sub add_language_alias { my($name,$new_name,$nowarn) = @_; $nowarn = (defined($nowarn) && $nowarn eq "nowarn" ? 1 : 0); return Locale::Codes::_add_alias("language",$name,$new_name,$nowarn); } #======================================================================= # # delete_language_alias ( NAME ) # #======================================================================= sub delete_language_alias { my($name,$nowarn) = @_; $nowarn = (defined($nowarn) && $nowarn eq "nowarn" ? 1 : 0); return Locale::Codes::_delete_alias("language",$name,$nowarn); } #======================================================================= # # rename_language_code ( CODE,NEW_CODE [,CODESET] ) # #======================================================================= sub rename_language_code { my($code,$new_code,@args) = @_; my $nowarn = 0; $nowarn = 1, pop(@args) if ($args[$#args] eq "nowarn"); my $codeset = shift(@args); my $err; ($err,$code,$codeset) = _code($code,$codeset); ($err,$new_code,$codeset) = _code($new_code,$codeset) if (! $err); return Locale::Codes::_rename_code("language",$code,$new_code,$codeset,$nowarn); } #======================================================================= # # add_language_code_alias ( CODE,NEW_CODE [,CODESET] ) # #======================================================================= sub add_language_code_alias { my($code,$new_code,@args) = @_; my $nowarn = 0; $nowarn = 1, pop(@args) if ($args[$#args] eq "nowarn"); my $codeset = shift(@args); my $err; ($err,$code,$codeset) = _code($code,$codeset); ($err,$new_code,$codeset) = _code($new_code,$codeset) if (! $err); return Locale::Codes::_add_code_alias("language",$code,$new_code,$codeset,$nowarn); } #======================================================================= # # delete_language_code_alias ( CODE [,CODESET] ) # #======================================================================= sub delete_language_code_alias { my($code,@args) = @_; my $nowarn = 0; $nowarn = 1, pop(@args) if ($args[$#args] eq "nowarn"); my $codeset = shift(@args); my $err; ($err,$code,$codeset) = _code($code,$codeset); return Locale::Codes::_delete_code_alias("language",$code,$codeset,$nowarn); } 1; # Local Variables: # mode: cperl # indent-tabs-mode: nil # cperl-indent-level: 3 # cperl-continued-statement-offset: 2 # cperl-continued-brace-offset: 0 # cperl-brace-offset: 0 # cperl-brace-imaginary-offset: 0 # cperl-label-offset: -2 # End: =pod =head1 NAME Locale::Language - standard codes for language identification =head1 SYNOPSIS use Locale::Language; $lang = code2language('en'); # $lang gets 'English' $code = language2code('French'); # $code gets 'fr' @codes = all_language_codes(); @names = all_language_names(); =head1 DESCRIPTION The C module provides access to standard codes used for identifying languages, such as those as defined in ISO 639. Most of the routines take an optional additional argument which specifies the code set to use. If not specified, the default ISO 639 two-letter codes will be used. =head1 SUPPORTED CODE SETS There are several different code sets you can use for identifying languages. The ones currently supported are: =over 4 =item B This is the set of two-letter (lowercase) codes from ISO 639, such as 'he' for Hebrew. This code set is identified with the symbol C. This is the default code set. =item B This is the set of three-letter (lowercase) bibliographic codes from ISO 639, such as 'heb' for Hebrew. This code set is identified with the symbol C. =item B This is the set of three-letter (lowercase) terminologic codes from ISO 639. This code set is identified with the symbol C. =back =head1 ROUTINES =over 4 =item B =item B =item B =item B =item B =item B =item B =item B =item B =item B =item B =item B =item B These routines are all documented in the Locale::Codes man page. =back =head1 SEE ALSO =over 4 =item B =item B =item B Source of the ISO 639 codes. =back =head1 AUTHOR See Locale::Codes for full author history. Currently maintained by Sullivan Beck (sbeck@cpan.org). =head1 COPYRIGHT Copyright (c) 1997-2001 Canon Research Centre Europe (CRE). Copyright (c) 2001-2010 Neil Bowers Copyright (c) 2010-2011 Sullivan Beck This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut package Memoize::AnyDBM_File; =head1 NAME Memoize::AnyDBM_File - glue to provide EXISTS for AnyDBM_File for Storable use =head1 DESCRIPTION See L. =cut use vars qw(@ISA $VERSION); $VERSION = '1.02'; @ISA = qw(DB_File GDBM_File Memoize::NDBM_File Memoize::SDBM_File ODBM_File) unless @ISA; my $verbose = 1; my $mod; for $mod (@ISA) { # (my $truemod = $mod) =~ s/^Memoize:://; # my $file = "$mod.pm"; # $file =~ s{::}{/}g; if (eval "require $mod") { print STDERR "AnyDBM_File => Selected $mod.\n" if $Verbose; @ISA = ($mod); # if we leave @ISA alone, warnings abound return 1; } } die "No DBM package was successfully found or installed"; package Locale::Maketext; use strict; use vars qw( @ISA $VERSION $MATCH_SUPERS $USING_LANGUAGE_TAGS $USE_LITERALS $MATCH_SUPERS_TIGHTLY); use Carp (); use I18N::LangTags (); use I18N::LangTags::Detect (); #-------------------------------------------------------------------------- BEGIN { unless(defined &DEBUG) { *DEBUG = sub () {0} } } # define the constant 'DEBUG' at compile-time # turn on utf8 if we have it (this is what GutsLoader.pm used to do essentially ) # use if (exists $INC{'utf8.pm'} || eval 'use utf8'), 'utf8'; BEGIN { # if we have it || we can load it if ( exists $INC{'utf8.pm'} || eval { local $SIG{'__DIE__'};require utf8; } ) { utf8->import(); DEBUG and warn " utf8 on for _compile()\n"; } else { DEBUG and warn " utf8 not available for _compile() ($INC{'utf8.pm'})\n$@\n"; } } $VERSION = '1.19'; @ISA = (); $MATCH_SUPERS = 1; $MATCH_SUPERS_TIGHTLY = 1; $USING_LANGUAGE_TAGS = 1; # Turning this off is somewhat of a security risk in that little or no # checking will be done on the legality of tokens passed to the # eval("use $module_name") in _try_use. If you turn this off, you have # to do your own taint checking. $USE_LITERALS = 1 unless defined $USE_LITERALS; # a hint for compiling bracket-notation things. my %isa_scan = (); ########################################################################### sub quant { my($handle, $num, @forms) = @_; return $num if @forms == 0; # what should this mean? return $forms[2] if @forms > 2 and $num == 0; # special zeroth case # Normal case: # Note that the formatting of $num is preserved. return( $handle->numf($num) . ' ' . $handle->numerate($num, @forms) ); # Most human languages put the number phrase before the qualified phrase. } sub numerate { # return this lexical item in a form appropriate to this number my($handle, $num, @forms) = @_; my $s = ($num == 1); return '' unless @forms; if(@forms == 1) { # only the headword form specified return $s ? $forms[0] : ($forms[0] . 's'); # very cheap hack. } else { # sing and plural were specified return $s ? $forms[0] : $forms[1]; } } #-------------------------------------------------------------------------- sub numf { my($handle, $num) = @_[0,1]; if($num < 10_000_000_000 and $num > -10_000_000_000 and $num == int($num)) { $num += 0; # Just use normal integer stringification. # Specifically, don't let %G turn ten million into 1E+007 } else { $num = CORE::sprintf('%G', $num); # "CORE::" is there to avoid confusion with the above sub sprintf. } while( $num =~ s/^([-+]?\d+)(\d{3})/$1,$2/s ) {1} # right from perlfaq5 # The initial \d+ gobbles as many digits as it can, and then we # backtrack so it un-eats the rightmost three, and then we # insert the comma there. $num =~ tr<.,><,.> if ref($handle) and $handle->{'numf_comma'}; # This is just a lame hack instead of using Number::Format return $num; } sub sprintf { no integer; my($handle, $format, @params) = @_; return CORE::sprintf($format, @params); # "CORE::" is there to avoid confusion with myself! } #=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=# use integer; # vroom vroom... applies to the whole rest of the module sub language_tag { my $it = ref($_[0]) || $_[0]; return undef unless $it =~ m/([^':]+)(?:::)?$/s; $it = lc($1); $it =~ tr<_><->; return $it; } sub encoding { my $it = $_[0]; return( (ref($it) && $it->{'encoding'}) || 'iso-8859-1' # Latin-1 ); } #-------------------------------------------------------------------------- sub fallback_languages { return('i-default', 'en', 'en-US') } sub fallback_language_classes { return () } #-------------------------------------------------------------------------- sub fail_with { # an actual attribute method! my($handle, @params) = @_; return unless ref($handle); $handle->{'fail'} = $params[0] if @params; return $handle->{'fail'}; } #-------------------------------------------------------------------------- sub failure_handler_auto { # Meant to be used like: # $handle->fail_with('failure_handler_auto') my $handle = shift; my $phrase = shift; $handle->{'failure_lex'} ||= {}; my $lex = $handle->{'failure_lex'}; my $value ||= ($lex->{$phrase} ||= $handle->_compile($phrase)); # Dumbly copied from sub maketext: return ${$value} if ref($value) eq 'SCALAR'; return $value if ref($value) ne 'CODE'; { local $SIG{'__DIE__'}; eval { $value = &$value($handle, @_) }; } # If we make it here, there was an exception thrown in the # call to $value, and so scream: if($@) { # pretty up the error message $@ =~ s{\s+at\s+\(eval\s+\d+\)\s+line\s+(\d+)\.?\n?} {\n in bracket code [compiled line $1],}s; #$err =~ s/\n?$/\n/s; Carp::croak "Error in maketexting \"$phrase\":\n$@ as used"; # Rather unexpected, but suppose that the sub tried calling # a method that didn't exist. } else { return $value; } } #========================================================================== sub new { # Nothing fancy! my $class = ref($_[0]) || $_[0]; my $handle = bless {}, $class; $handle->init; return $handle; } sub init { return } # no-op ########################################################################### sub maketext { # Remember, this can fail. Failure is controllable many ways. Carp::croak 'maketext requires at least one parameter' unless @_ > 1; my($handle, $phrase) = splice(@_,0,2); Carp::confess('No handle/phrase') unless (defined($handle) && defined($phrase)); # backup $@ in case it it's still being used in the calling code. # If no failures, we'll re-set it back to what it was later. my $at = $@; # Copy @_ case one of its elements is $@. @_ = @_; # Look up the value: my $value; if (exists $handle->{'_external_lex_cache'}{$phrase}) { DEBUG and warn "* Using external lex cache version of \"$phrase\"\n"; $value = $handle->{'_external_lex_cache'}{$phrase}; } else { foreach my $h_r ( @{ $isa_scan{ref($handle) || $handle} || $handle->_lex_refs } ) { DEBUG and warn "* Looking up \"$phrase\" in $h_r\n"; if(exists $h_r->{$phrase}) { DEBUG and warn " Found \"$phrase\" in $h_r\n"; unless(ref($value = $h_r->{$phrase})) { # Nonref means it's not yet compiled. Compile and replace. if ($handle->{'use_external_lex_cache'}) { $value = $handle->{'_external_lex_cache'}{$phrase} = $handle->_compile($value); } else { $value = $h_r->{$phrase} = $handle->_compile($value); } } last; } # extending packages need to be able to localize _AUTO and if readonly can't "local $h_r->{'_AUTO'} = 1;" # but they can "local $handle->{'_external_lex_cache'}{'_AUTO'} = 1;" elsif($phrase !~ m/^_/s and ($handle->{'use_external_lex_cache'} ? ( exists $handle->{'_external_lex_cache'}{'_AUTO'} ? $handle->{'_external_lex_cache'}{'_AUTO'} : $h_r->{'_AUTO'} ) : $h_r->{'_AUTO'})) { # it's an auto lex, and this is an autoable key! DEBUG and warn " Automaking \"$phrase\" into $h_r\n"; if ($handle->{'use_external_lex_cache'}) { $value = $handle->{'_external_lex_cache'}{$phrase} = $handle->_compile($phrase); } else { $value = $h_r->{$phrase} = $handle->_compile($phrase); } last; } DEBUG>1 and print " Not found in $h_r, nor automakable\n"; # else keep looking } } unless(defined($value)) { DEBUG and warn "! Lookup of \"$phrase\" in/under ", ref($handle) || $handle, " fails.\n"; if(ref($handle) and $handle->{'fail'}) { DEBUG and warn "WARNING0: maketext fails looking for <$phrase>\n"; my $fail; if(ref($fail = $handle->{'fail'}) eq 'CODE') { # it's a sub reference $@ = $at; # Put $@ back in case we altered it along the way. return &{$fail}($handle, $phrase, @_); # If it ever returns, it should return a good value. } else { # It's a method name $@ = $at; # Put $@ back in case we altered it along the way. return $handle->$fail($phrase, @_); # If it ever returns, it should return a good value. } } else { # All we know how to do is this; Carp::croak("maketext doesn't know how to say:\n$phrase\nas needed"); } } if(ref($value) eq 'SCALAR'){ $@ = $at; # Put $@ back in case we altered it along the way. return $$value ; } if(ref($value) ne 'CODE'){ $@ = $at; # Put $@ back in case we altered it along the way. return $value ; } { local $SIG{'__DIE__'}; eval { $value = &$value($handle, @_) }; } # If we make it here, there was an exception thrown in the # call to $value, and so scream: if ($@) { # pretty up the error message $@ =~ s{\s+at\s+\(eval\s+\d+\)\s+line\s+(\d+)\.?\n?} {\n in bracket code [compiled line $1],}s; #$err =~ s/\n?$/\n/s; Carp::croak "Error in maketexting \"$phrase\":\n$@ as used"; # Rather unexpected, but suppose that the sub tried calling # a method that didn't exist. } else { $@ = $at; # Put $@ back in case we altered it along the way. return $value; } $@ = $at; # Put $@ back in case we altered it along the way. } ########################################################################### sub get_handle { # This is a constructor and, yes, it CAN FAIL. # Its class argument has to be the base class for the current # application's l10n files. my($base_class, @languages) = @_; $base_class = ref($base_class) || $base_class; # Complain if they use __PACKAGE__ as a project base class? if( @languages ) { DEBUG and warn 'Lgs@', __LINE__, ': ', map("<$_>", @languages), "\n"; if($USING_LANGUAGE_TAGS) { # An explicit language-list was given! @languages = map {; $_, I18N::LangTags::alternate_language_tags($_) } # Catch alternation map I18N::LangTags::locale2language_tag($_), # If it's a lg tag, fine, pass thru (untainted) # If it's a locale ID, try converting to a lg tag (untainted), # otherwise nix it. @languages; DEBUG and warn 'Lgs@', __LINE__, ': ', map("<$_>", @languages), "\n"; } } else { @languages = $base_class->_ambient_langprefs; } @languages = $base_class->_langtag_munging(@languages); my %seen; foreach my $module_name ( map { $base_class . '::' . $_ } @languages ) { next unless length $module_name; # sanity next if $seen{$module_name}++ # Already been here, and it was no-go || !&_try_use($module_name); # Try to use() it, but can't it. return($module_name->new); # Make it! } return undef; # Fail! } ########################################################################### sub _langtag_munging { my($base_class, @languages) = @_; # We have all these DEBUG statements because otherwise it's hard as hell # to diagnose ifwhen something goes wrong. DEBUG and warn 'Lgs1: ', map("<$_>", @languages), "\n"; if($USING_LANGUAGE_TAGS) { DEBUG and warn 'Lgs@', __LINE__, ': ', map("<$_>", @languages), "\n"; @languages = $base_class->_add_supers( @languages ); push @languages, I18N::LangTags::panic_languages(@languages); DEBUG and warn "After adding panic languages:\n", ' Lgs@', __LINE__, ': ', map("<$_>", @languages), "\n"; push @languages, $base_class->fallback_languages; # You are free to override fallback_languages to return empty-list! DEBUG and warn 'Lgs@', __LINE__, ': ', map("<$_>", @languages), "\n"; @languages = # final bit of processing to turn them into classname things map { my $it = $_; # copy $it =~ tr<-A-Z><_a-z>; # lc, and turn - to _ $it =~ tr<_a-z0-9><>cd; # remove all but a-z0-9_ $it; } @languages ; DEBUG and warn "Nearing end of munging:\n", ' Lgs@', __LINE__, ': ', map("<$_>", @languages), "\n"; } else { DEBUG and warn "Bypassing language-tags.\n", ' Lgs@', __LINE__, ': ', map("<$_>", @languages), "\n"; } DEBUG and warn "Before adding fallback classes:\n", ' Lgs@', __LINE__, ': ', map("<$_>", @languages), "\n"; push @languages, $base_class->fallback_language_classes; # You are free to override that to return whatever. DEBUG and warn "Finally:\n", ' Lgs@', __LINE__, ': ', map("<$_>", @languages), "\n"; return @languages; } ########################################################################### sub _ambient_langprefs { return I18N::LangTags::Detect::detect(); } ########################################################################### sub _add_supers { my($base_class, @languages) = @_; if (!$MATCH_SUPERS) { # Nothing DEBUG and warn "Bypassing any super-matching.\n", ' Lgs@', __LINE__, ': ', map("<$_>", @languages), "\n"; } elsif( $MATCH_SUPERS_TIGHTLY ) { DEBUG and warn "Before adding new supers tightly:\n", ' Lgs@', __LINE__, ': ', map("<$_>", @languages), "\n"; @languages = I18N::LangTags::implicate_supers( @languages ); DEBUG and warn "After adding new supers tightly:\n", ' Lgs@', __LINE__, ': ', map("<$_>", @languages), "\n"; } else { DEBUG and warn "Before adding supers to end:\n", ' Lgs@', __LINE__, ': ', map("<$_>", @languages), "\n"; @languages = I18N::LangTags::implicate_supers_strictly( @languages ); DEBUG and warn "After adding supers to end:\n", ' Lgs@', __LINE__, ': ', map("<$_>", @languages), "\n"; } return @languages; } ########################################################################### # # This is where most people should stop reading. # ########################################################################### my %tried = (); # memoization of whether we've used this module, or found it unusable. sub _try_use { # Basically a wrapper around "require Modulename" # "Many men have tried..." "They tried and failed?" "They tried and died." return $tried{$_[0]} if exists $tried{$_[0]}; # memoization my $module = $_[0]; # ASSUME sane module name! { no strict 'refs'; no warnings 'once'; return($tried{$module} = 1) if %{$module . '::Lexicon'} or @{$module . '::ISA'}; # weird case: we never use'd it, but there it is! } DEBUG and warn " About to use $module ...\n"; local $SIG{'__DIE__'}; local $@; eval "require $module"; # used to be "use $module", but no point in that. if($@) { DEBUG and warn "Error using $module \: $@\n"; return $tried{$module} = 0; } else { DEBUG and warn " OK, $module is used\n"; return $tried{$module} = 1; } } #-------------------------------------------------------------------------- sub _lex_refs { # report the lexicon references for this handle's class # returns an arrayREF! no strict 'refs'; no warnings 'once'; my $class = ref($_[0]) || $_[0]; DEBUG and warn "Lex refs lookup on $class\n"; return $isa_scan{$class} if exists $isa_scan{$class}; # memoization! my @lex_refs; my $seen_r = ref($_[1]) ? $_[1] : {}; if( defined( *{$class . '::Lexicon'}{'HASH'} )) { push @lex_refs, *{$class . '::Lexicon'}{'HASH'}; DEBUG and warn '%' . $class . '::Lexicon contains ', scalar(keys %{$class . '::Lexicon'}), " entries\n"; } # Implements depth(height?)-first recursive searching of superclasses. # In hindsight, I suppose I could have just used Class::ISA! foreach my $superclass (@{$class . '::ISA'}) { DEBUG and warn " Super-class search into $superclass\n"; next if $seen_r->{$superclass}++; push @lex_refs, @{&_lex_refs($superclass, $seen_r)}; # call myself } $isa_scan{$class} = \@lex_refs; # save for next time return \@lex_refs; } sub clear_isa_scan { %isa_scan = (); return; } # end on a note of simplicity! #-------------------------------------------------------------------------- sub _compile { # This big scary routine compiles an entry. # It returns either a coderef if there's brackety bits in this, or # otherwise a ref to a scalar. my $string_to_compile = $_[1]; # There are taint issues using regex on @_ - perlbug 60378,27344 # The while() regex is more expensive than this check on strings that don't need a compile. # this op causes a ~2% speed hit for strings that need compile and a 250% speed improvement # on strings that don't need compiling. return \"$string_to_compile" if($string_to_compile !~ m/[\[~\]]/ms); # return a string ref if chars [~] are not in the string my $target = ref($_[0]) || $_[0]; my(@code); my(@c) = (''); # "chunks" -- scratch. my $call_count = 0; my $big_pile = ''; { my $in_group = 0; # start out outside a group my($m, @params); # scratch while($string_to_compile =~ # Iterate over chunks. m/( [^\~\[\]]+ # non-~[] stuff (Capture everything else here) | ~. # ~[, ~], ~~, ~other | \[ # [ presumably opening a group | \] # ] presumably closing a group | ~ # terminal ~ ? | $ )/xgs ) { DEBUG>2 and warn qq{ "$1"\n}; if($1 eq '[' or $1 eq '') { # "[" or end # Whether this is "[" or end, force processing of any # preceding literal. if($in_group) { if($1 eq '') { $target->_die_pointing($string_to_compile, 'Unterminated bracket group'); } else { $target->_die_pointing($string_to_compile, 'You can\'t nest bracket groups'); } } else { if ($1 eq '') { DEBUG>2 and warn " [end-string]\n"; } else { $in_group = 1; } die "How come \@c is empty?? in <$string_to_compile>" unless @c; # sanity if(length $c[-1]) { # Now actually processing the preceding literal $big_pile .= $c[-1]; if($USE_LITERALS and ( (ord('A') == 65) ? $c[-1] !~ m/[^\x20-\x7E]/s # ASCII very safe chars : $c[-1] !~ m/[^ !"\#\$%&'()*+,\-.\/0-9:;<=>?\@A-Z[\\\]^_`a-z{|}~\x07]/s # EBCDIC very safe chars )) { # normal case -- all very safe chars $c[-1] =~ s/'/\\'/g; push @code, q{ '} . $c[-1] . "',\n"; $c[-1] = ''; # reuse this slot } else { push @code, ' $c[' . $#c . "],\n"; push @c, ''; # new chunk } } # else just ignore the empty string. } } elsif($1 eq ']') { # "]" # close group -- go back in-band if($in_group) { $in_group = 0; DEBUG>2 and warn " --Closing group [$c[-1]]\n"; # And now process the group... if(!length($c[-1]) or $c[-1] =~ m/^\s+$/s) { DEBUG>2 and warn " -- (Ignoring)\n"; $c[-1] = ''; # reset out chink next; } #$c[-1] =~ s/^\s+//s; #$c[-1] =~ s/\s+$//s; ($m,@params) = split(/,/, $c[-1], -1); # was /\s*,\s*/ # A bit of a hack -- we've turned "~,"'s into DELs, so turn # 'em into real commas here. if (ord('A') == 65) { # ASCII, etc foreach($m, @params) { tr/\x7F/,/ } } else { # EBCDIC (1047, 0037, POSIX-BC) # Thanks to Peter Prymmer for the EBCDIC handling foreach($m, @params) { tr/\x07/,/ } } # Special-case handling of some method names: if($m eq '_*' or $m =~ m/^_(-?\d+)$/s) { # Treat [_1,...] as [,_1,...], etc. unshift @params, $m; $m = ''; } elsif($m eq '*') { $m = 'quant'; # "*" for "times": "4 cars" is 4 times "cars" } elsif($m eq '#') { $m = 'numf'; # "#" for "number": [#,_1] for "the number _1" } # Most common case: a simple, legal-looking method name if($m eq '') { # 0-length method name means to just interpolate: push @code, ' ('; } elsif($m =~ /^\w+(?:\:\:\w+)*$/s and $m !~ m/(?:^|\:)\d/s # exclude starting a (sub)package or symbol with a digit ) { # Yes, it even supports the demented (and undocumented?) # $obj->Foo::bar(...) syntax. $target->_die_pointing( $string_to_compile, q{Can't use "SUPER::" in a bracket-group method}, 2 + length($c[-1]) ) if $m =~ m/^SUPER::/s; # Because for SUPER:: to work, we'd have to compile this into # the right package, and that seems just not worth the bother, # unless someone convinces me otherwise. push @code, ' $_[0]->' . $m . '('; } else { # TODO: implement something? or just too icky to consider? $target->_die_pointing( $string_to_compile, "Can't use \"$m\" as a method name in bracket group", 2 + length($c[-1]) ); } pop @c; # we don't need that chunk anymore ++$call_count; foreach my $p (@params) { if($p eq '_*') { # Meaning: all parameters except $_[0] $code[-1] .= ' @_[1 .. $#_], '; # and yes, that does the right thing for all @_ < 3 } elsif($p =~ m/^_(-?\d+)$/s) { # _3 meaning $_[3] $code[-1] .= '$_[' . (0 + $1) . '], '; } elsif($USE_LITERALS and ( (ord('A') == 65) ? $p !~ m/[^\x20-\x7E]/s # ASCII very safe chars : $p !~ m/[^ !"\#\$%&'()*+,\-.\/0-9:;<=>?\@A-Z[\\\]^_`a-z{|}~\x07]/s # EBCDIC very safe chars )) { # Normal case: a literal containing only safe characters $p =~ s/'/\\'/g; $code[-1] .= q{'} . $p . q{', }; } else { # Stow it on the chunk-stack, and just refer to that. push @c, $p; push @code, ' $c[' . $#c . '], '; } } $code[-1] .= "),\n"; push @c, ''; } else { $target->_die_pointing($string_to_compile, q{Unbalanced ']'}); } } elsif(substr($1,0,1) ne '~') { # it's stuff not containing "~" or "[" or "]" # i.e., a literal blob $c[-1] .= $1; } elsif($1 eq '~~') { # "~~" $c[-1] .= '~'; } elsif($1 eq '~[') { # "~[" $c[-1] .= '['; } elsif($1 eq '~]') { # "~]" $c[-1] .= ']'; } elsif($1 eq '~,') { # "~," if($in_group) { # This is a hack, based on the assumption that no-one will actually # want a DEL inside a bracket group. Let's hope that's it's true. if (ord('A') == 65) { # ASCII etc $c[-1] .= "\x7F"; } else { # EBCDIC (cp 1047, 0037, POSIX-BC) $c[-1] .= "\x07"; } } else { $c[-1] .= '~,'; } } elsif($1 eq '~') { # possible only at string-end, it seems. $c[-1] .= '~'; } else { # It's a "~X" where X is not a special character. # Consider it a literal ~ and X. $c[-1] .= $1; } } } if($call_count) { undef $big_pile; # Well, nevermind that. } else { # It's all literals! Ahwell, that can happen. # So don't bother with the eval. Return a SCALAR reference. return \$big_pile; } die q{Last chunk isn't null??} if @c and length $c[-1]; # sanity DEBUG and warn scalar(@c), " chunks under closure\n"; if(@code == 0) { # not possible? DEBUG and warn "Empty code\n"; return \''; } elsif(@code > 1) { # most cases, presumably! unshift @code, "join '',\n"; } unshift @code, "use strict; sub {\n"; push @code, "}\n"; DEBUG and warn @code; my $sub = eval(join '', @code); die "$@ while evalling" . join('', @code) if $@; # Should be impossible. return $sub; } #-------------------------------------------------------------------------- sub _die_pointing { # This is used by _compile to throw a fatal error my $target = shift; # class name # ...leaving $_[0] the error-causing text, and $_[1] the error message my $i = index($_[0], "\n"); my $pointy; my $pos = pos($_[0]) - (defined($_[2]) ? $_[2] : 0) - 1; if($pos < 1) { $pointy = "^=== near there\n"; } else { # we need to space over my $first_tab = index($_[0], "\t"); if($pos > 2 and ( -1 == $first_tab or $first_tab > pos($_[0]))) { # No tabs, or the first tab is harmlessly after where we will point to, # AND we're far enough from the margin that we can draw a proper arrow. $pointy = ('=' x $pos) . "^ near there\n"; } else { # tabs screw everything up! $pointyVI_MESSAGE_CATALOG regel te langX kan regel %lu niet verwijderenX kan niet toevoegen aan regel %luX kan niet invoegen vooraan regel %luX kan regel %lu niet opslaanX kan laatste regel niet lezenX Fout: kan regel %lu niet vindenX log bestandX Er vindt geen logging plaats, kan wijzigingen niet ongedaan makenX geen wijzigingen om ongedaan te makenX Er vindt geen logging plaats, kan wijzigingen niet ongedaan makenX Er vindt geen logging plaats, herhaling niet mogelijkX geen wijzigingen om te herhalenX %s/%d: schrijven naar log misluktX Vi's standaard invoer en uitvoer moeten aan een terminal gekoppeld zijnX Merk %s: niet gezetX Merk %s: de regel is verwijderdX Merk %s: de cursor positie bestaat niet meerX Fout: X nieuw bestandX naam veranderdX gewijzigdX ongewijzigdX NIET BEVEILIGDX niet schrijfbaarX regel %lu uit %lu [%ld%%]X leeg bestandX regel %luX Het bestand %s is geen message catalogX Niet in staat om de standaard %s optie in te stellenX Gebruik: %sX set: optie %s onbekend: 'set all' laat alle opties zienX set: [no]%s optie kan geen waarde hebbenX set: %s optie moet een waarde hebbenX set: %s optie: %sX set: %s optie: %s: getal is te grootX set: %s optie: %s is een ongeldige waardeX set: %s optie moet een waarde hebbenX Te weinig kolommen op het scherm, minder dan %dX Aantal kolommen te groot, meer dan %dX Te weinig regels op het scherm, minder dan %dX Aantal regels te groot, meer dan %dX De lisp optie is niet ondersteundX messages niet uitgeschakeld: %sX messages niet geactiveerd: %sX De paragraph optie moet karakter paren bevattenX De section optie moet karakter paren bevattenX De standaard buffer is leegX Buffer %s is leegX Bestanden met newlines in de naam kunnen niet hersteld wordenX Wijzigingen kunnen niet ongedaan gemaakt worden als deze sessie misluktX Bestand wordt gecopieerd voor herstel...X Herstel mechanisme werkt niet: %sX Wijzigingen kunnen niet ongedaan gemaakt worden als deze sessie misluktX Kon bestand niet veilig stellen: %sX Bestand wordt gecopieerd voor herstel...X Informatie met betrekking tot gebruiker nummer %u niet gevondenX Kan herstel bestand niet beveiligenX herstel buffer overgelopenX herstel bestandX %s: verminkt herstel bestandX %s: verminkt herstel bestandX U heeft geen bestand genaamd %s te herstellenX U kan eerdere versies van dit bestand herstellenX U kan nog meer bestanden herstellenX kan geen email versturen: %sX Bestand leeg; niets om te doorzoekenX Einde van het bestand bereikt zonder dat het patroon gevonden isX Geen vorig zoek patroonX Patroon niet gevondenX Begin van het bestand bereikt zonder dat het patroon gevonden isX Zoek-operatie omgeslagenX Bezig met zoeken...X Geen niet-printbaar karakter gevondenX Onbekend commandoX Commando niet beschikbaar in ex modeX Aantal mag niet nul zijnX %s: ongeldige regel aanduidingX Interne fout in syntax tabel (%s: %s)X Gebruik: %sX %s: tijdelijke buffer niet vrijgegevenX Vlag offset voor regel 1X Vlag offset voorbij bestands eindeX bestand/scherm veranderd tijdens uitvoeren van @ in een blokX bestand/scherm veranderd tijdens uitvoeren van globaal/v commandoX Ex commando mislukt: rest van commando(s) genegeerdX Ex commando mislukt: gemappede toetsen genegeerdX Het tweede adres is kleiner dan het eersteX Geen merk naam opgegevenX \\ niet gevolgd door / of ?X Referentie aan een regel nummer kleiner dan 0X Het %s commando is onbekendX Adres waarde te grootX Adres waarde te kleinX Ongeldige adres combinatieX Ongeldig adres: slechts %lu regels in het bestand aanwezigX Ongeldig adres: het bestand is leegX Het %s commando staat het adres 0 niet toeX Geen afkortingen om weer te gevenX Afkortingen moeten eindigen met een "woord" letterX Afkortingen mogen geen tabulaties of spaties bevattenX Afkortingen mogen geen woord/niet-woord karakters mengen, behalve aan het eindeX "%s" is geen afkortingX Vi commando mislukt: gemappede toetsen genegeerdX Dit is het laatste bestandX Dit is het eerste bestandX Dit is het eerste bestandX lijst met bestanden is leegX Geen voorgaand commando om "!" te vervangenX Geen bestandsnaam voor %%X Geen bestandsnaam voor #X Fout: execl: %sX I/O fout: %sX Bestand gewijzigd sinds laatste schrijfactie; schrijf het weg of gebruik ! om het te forcerenX Kan uw home directory niet vindenX Nieuwe huidige directory: %sX Geen cut buffers aanwezigX Het %s commando kan niet gebruikt worden in een globaal of v commandoX %s/%s: niet gelezen: noch U noch root is de eigenaarX %s/%s: niet gelezen: U bent niet de eigenaarX %s/%s: niet gelezen: kan gewijzigd worden door andere gebruikersX %s: niet gelezen: noch U noch root is de eigenaar"X %s: niet gelezen: U bent niet de eigenaarX %s: niet gelezen: kan gewijzigd worden door andere gebruikersX Geen volgende regel om samen te voegenX Geen input map entriesX Geen command map entriesX Het %s karakter kan niet ge-remapped wordenX "%s" is niet gemappedX Merk naam moet een enkel karakter zijnX %s bestaat al, niet weggeschreven; gebruik ! om het te forcerenX Nieuw .exrc bestand: %s. X doel regel ligt in het blokX Het open commando vereist dat de open optie actief isX Het open commando is nog niet ondersteundX Kan dit bestand niet veilig stellenX Bestand veilig gesteldX %s resulteert in te veel bestandsnamenX Alleen echte bestanden en named pipes kunnen gelezen wordenX %s: lees beveiliging niet beschikbaarX Bezig met lezen...X %s: %lu regels, %lu karaktersX Geen achtergrond schermen aanwezigX Het script commando is alleen beschikbaar in vi modeX Geen comando om uit te voerenX shiftwidth optie op 0 gezetX Count te grootX Count te kleinX Reguliere expressie opgegeven; r vlag heeft geen betekenisX De #, l en p vlaggen kunnen niet gecombineerd worden met de c vlag in vi modeX Geen match gevondenX Geen voorafgaande tag aanwezigX Minder dan %s elementen op de tags stapel; gebruik :display t[ags]X Geen bestand genaamd %s op de tags stapel; gebruik :display t[ags]X Kies Enter om door te gaan: X %s: tag niet gevondenX %s: verminkte tag in %sX %s: Het regel nummer van deze tag is voorbij het einde van het bestandX De tags stapel is leegX %s: zoek patroon niet gevondenX %d andere bestanden te wijzigenX Buffer %s is leegX Bevestig wijziging? [n]X OnderbrokenX Geen voorafgaande buffer om uit te voerenX Geen vorige reguliere expressieX Het %s commando vereist dat er een bestand geladen isX Gebruik: %sX Het visual commando vereist dat de open optie actief isX Leeg bestandX Geen voorafgaand F, f, T of t zoek commandoX %s niet gevondenX Geen voorafgaand bestand te bewerkenX Cursor niet op een getalX Getal wordt te grootX Getal wordt te kleinX Geen overeenkomstig karakter op deze regelX Overeenkomstig karakter niet gevondenX Geen karakters te vervangenX Geen ander scherm aanwezigX Karakters achter het zoek patroon, de regel offset, en/of het z commandoX Geen voorafgaand zoek patroonX Zoekopdracht na omslag teruggekeerd op originele positieX Afkorting overschrijdt expansie limiet: karakters genegeerdX Ongeldig karakter; quote to enterX Reeds aan het begin van de invoerX Niet meer karakters te verwijderenX Verplaatsing voorbij het einde van het bestandX Verplaatsing voorbij het einde van de regelX Cursor niet verplaatstX Reeds aan het begin van het bestandX Verplaatsing voorbij het begin van het bestandX Reeds in de eerste kolomX Buffers moeten voor het commando opgegeven wordenX Reeds bij het einde van het bestandX Reeds bij het einde van de regelX %s is geen vi commandoX Gebruik: %sX Geen karakters te verwijderenX Het Q commando vereist de ex terminal interfaceX Geen commando om te herhalenX Het bestand is leegX %s mag niet gebruikt worden als een verplaatsings commandoX Al in commando modeX Cursor niet in een woordX Windows optie waarde is te groot, maximum is %uX ToevoegenX VeranderenX CommandoX InvoegenX VervangenX Verplaatsing voorbij het eind van het schermX Verplaatsing voorbij het begin van het schermX Scherm moet meer dan %d regels hebben om het te kunnen splitsenX Er zijn geen achtergrond schermenX Er is geen achtergrond scherm waarin U bestand %s aan het bewerken bentX U kan uw enige scherm niet in de achtergrond zettenX Het scherm kan slechts verkleind worden tot %d regelsX Het scherm kan niet kleinerX Het scherm kan niPX`hpxet groterX Dit scherm kan niet gesuspend wordenX Onderbroken: gemappede toetsen genegeerdX vi: tijdelijke buffer niet vrijgegevenX Deze terminal heeft geen %s toetsX Er kan slechts een buffer opgegeven wordenX Getal groter dan %luX OnderbrokenX Aanmaken van tijdelijk bestand is misluktX Waarschuwing: %s is geen regulier bestandX %s is al geopend, bestand is in deze sessie niet schrijfbaarX %s: verwijdering misluktX %s: sluiting misluktX %s: verwijdering misluktX %s: verwijdering misluktX Bestand niet schrijfbaar, niet weggeschreven; gebruik ! om het te forcerenX Bestand niet schrijfbaar, niet weggeschrevenX %s bestaat al, niet weggeschreven; gebruik ! om het te forcerenX %s bestaat al, niet weggeschrevenX Gebruik ! om een incompleet bestand weg te schrijvenX Bestand incompleet, niet weggeschrevenX %s: bestand op disk nieuwer dan deze versie; gebruik ! om het te forcerenX %s: bestand op disk nieuwer dan deze versieX %s: schrijf beveiliging niet beschikbaarX Bezig met schrijven...X %s: WAARSCHUWING: BESTAND INCOMPLEETX Reeds op de eerste tag van deze groepX %s: nieuw bestand: %lu regels, %lu karaktersX %s: %lu regels, %lu karaktersX %s resulteert in te veel bestandsnamenX %s: geen normaal bestandX %s: U bent niet de eigenaarX %s: kan gewijzigd worden door andere gebruikersX Bestand gewijzigd sinds laatste schrijfactie; schrijf het weg of gebruik ! om het te forcerenX Bestand gewijzigd sinds laatste schrijfactie; schrijf het weg of gebruik :edit! om het te forcerenX Bestand gewijzigd sinds laatste schrijfactie; schrijf het weg of gebruik ! om het te forcerenX Tijdelijk bestand; exit negeert wijzigingenX Bestand niet schrijfbaar, wijzigingen niet automatisch weggeschrevenX log opnieuw gestartX Bevestig? [ynq]X Druk op een toets om door te gaan: X Druk op een toets om door te gaan [: voor meer ex commandos]: X Druk op een toets om door te gaan [q om te stoppen]: X Deze vorm van %s vereist de ex terminal interfaceX Entering ex input mode.X Commando mislukt, nog geen bestand geladen.X doorgaan?X Onverwacht character eventX Onverwacht end-of-file eventX Geen match gevonden voor dit patroonX Onverwacht interrupt eventX Onverwacht quit eventX Onverwacht repaint eventX Reeds op de laatste tag van deze groepX Het %s command vereist de ex terminal interfaceX Deze vorm van %s is niet ondersteund als de secure edit optie actief isX Onverwacht string eventX Onverwacht timeout eventX Onverwacht write eventX Shell expansies zijn niet ondersteund als de secure edit optie actief isX Het %s commando is niet ondersteund als de secure edit optie actief isX set: %s mag niet uitgezet wordenX Scherm te klein.X toegevoegdX gewijzigdX verwijderdX samengevoegdX verplaatstX verschovenX gebufferdX regelX regelsX Vi was niet geladen met een Tcl interpreterX Bestand gewijzigd sinds het de laatste keer weg is geschreven.X Shell expansie misluktX Geen %s edit optie opgegevenX Vi was niet geladen met een Perl interpreterX Geen ex commando om uit te voerenX Kies om commando uit te voeren, :q om te stoppenX Gebruik "cscope help" voor uitlegX Nog geen cscope connectie aanwezigX %s: onbekend zoek type: gebruik een van %sX %d: onbekende cscope sessieX set: de %s optie mag nooit aangezet wordenX set: de %s optie mag nooit op 0 gezet wordenX %s: toegevoegd: %lu regels, %lu karaktersX Onverwacht resize eventX %d bestanden te wijzigenX -h package File::Temp; =head1 NAME File::Temp - return name and handle of a temporary file safely =begin __INTERNALS =head1 PORTABILITY This section is at the top in order to provide easier access to porters. It is not expected to be rendered by a standard pod formatting tool. Please skip straight to the SYNOPSIS section if you are not trying to port this module to a new platform. This module is designed to be portable across operating systems and it currently supports Unix, VMS, DOS, OS/2, Windows and Mac OS (Classic). When porting to a new OS there are generally three main issues that have to be solved: =over 4 =item * Can the OS unlink an open file? If it can not then the C<_can_unlink_opened_file> method should be modified. =item * Are the return values from C reliable? By default all the return values from C are compared when unlinking a temporary file using the filename and the handle. Operating systems other than unix do not always have valid entries in all fields. If C fails then the C comparison should be modified accordingly. =item * Security. Systems that can not support a test for the sticky bit on a directory can not use the MEDIUM and HIGH security tests. The C<_can_do_level> method should be modified accordingly. =back =end __INTERNALS =head1 SYNOPSIS use File::Temp qw/ tempfile tempdir /; $fh = tempfile(); ($fh, $filename) = tempfile(); ($fh, $filename) = tempfile( $template, DIR => $dir); ($fh, $filename) = tempfile( $template, SUFFIX => '.dat'); ($fh, $filename) = tempfile( $template, TMPDIR => 1 ); binmode( $fh, ":utf8" ); $dir = tempdir( CLEANUP => 1 ); ($fh, $filename) = tempfile( DIR => $dir ); Object interface: require File::Temp; use File::Temp (); use File::Temp qw/ :seekable /; $fh = File::Temp->new(); $fname = $fh->filename; $fh = File::Temp->new(TEMPLATE => $template); $fname = $fh->filename; $tmp = File::Temp->new( UNLINK => 0, SUFFIX => '.dat' ); print $tmp "Some data\n"; print "Filename is $tmp\n"; $tmp->seek( 0, SEEK_END ); The following interfaces are provided for compatibility with existing APIs. They should not be used in new code. MkTemp family: use File::Temp qw/ :mktemp /; ($fh, $file) = mkstemp( "tmpfileXXXXX" ); ($fh, $file) = mkstemps( "tmpfileXXXXXX", $suffix); $tmpdir = mkdtemp( $template ); $unopened_file = mktemp( $template ); POSIX functions: use File::Temp qw/ :POSIX /; $file = tmpnam(); $fh = tmpfile(); ($fh, $file) = tmpnam(); Compatibility functions: $unopened_file = File::Temp::tempnam( $dir, $pfx ); =head1 DESCRIPTION C can be used to create and open temporary files in a safe way. There is both a function interface and an object-oriented interface. The File::Temp constructor or the tempfile() function can be used to return the name and the open filehandle of a temporary file. The tempdir() function can be used to create a temporary directory. The security aspect of temporary file creation is emphasized such that a filehandle and filename are returned together. This helps guarantee that a race condition can not occur where the temporary file is created by another process between checking for the existence of the file and its opening. Additional security levels are provided to check, for example, that the sticky bit is set on world writable directories. See L<"safe_level"> for more information. For compatibility with popular C library functions, Perl implementations of the mkstemp() family of functions are provided. These are, mkstemp(), mkstemps(), mkdtemp() and mktemp(). Additionally, implementations of the standard L tmpnam() and tmpfile() functions are provided if required. Implementations of mktemp(), tmpnam(), and tempnam() are provided, but should be used with caution since they return only a filename that was valid when function was called, so cannot guarantee that the file will not exist by the time the caller opens the filename. Filehandles returned by these functions support the seekable methods. =cut # 5.6.0 gives us S_IWOTH, S_IWGRP, our and auto-vivifying filehandls # People would like a version on 5.004 so give them what they want :-) use 5.004; use strict; use Carp; use File::Spec 0.8; use File::Path qw/ rmtree /; use Fcntl 1.03; use IO::Seekable; # For SEEK_* use Errno; require VMS::Stdio if $^O eq 'VMS'; # pre-emptively load Carp::Heavy. If we don't when we run out of file # handles and attempt to call croak() we get an error message telling # us that Carp::Heavy won't load rather than an error telling us we # have run out of file handles. We either preload croak() or we # switch the calls to croak from _gettemp() to use die. eval { require Carp::Heavy; }; # Need the Symbol package if we are running older perl require Symbol if $] < 5.006; ### For the OO interface use base qw/ IO::Handle IO::Seekable /; use overload '""' => "STRINGIFY", fallback => 1; # use 'our' on v5.6.0 use vars qw($VERSION @EXPORT_OK %EXPORT_TAGS $DEBUG $KEEP_ALL); $DEBUG = 0; $KEEP_ALL = 0; # We are exporting functions use base qw/Exporter/; # Export list - to allow fine tuning of export table @EXPORT_OK = qw{ tempfile tempdir tmpnam tmpfile mktemp mkstemp mkstemps mkdtemp unlink0 cleanup SEEK_SET SEEK_CUR SEEK_END }; # Groups of functions for export %EXPORT_TAGS = ( 'POSIX' => [qw/ tmpnam tmpfile /], 'mktemp' => [qw/ mktemp mkstemp mkstemps mkdtemp/], 'seekable' => [qw/ SEEK_SET SEEK_CUR SEEK_END /], ); # add contents of these tags to @EXPORT Exporter::export_tags('POSIX','mktemp','seekable'); # Version number $VERSION = '0.22'; # This is a list of characters that can be used in random filenames my @CHARS = (qw/ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z a b c d e f g h i j k l m n o p q r s t u v w x y z 0 1 2 3 4 5 6 7 8 9 _ /); # Maximum number of tries to make a temp file before failing use constant MAX_TRIES => 1000; # Minimum number of X characters that should be in a template use constant MINX => 4; # Default template when no template supplied use constant TEMPXXX => 'X' x 10; # Constants for the security level use constant STANDARD => 0; use constant MEDIUM => 1; use constant HIGH => 2; # OPENFLAGS. If we defined the flag to use with Sysopen here this gives # us an optimisation when many temporary files are requested my $OPENFLAGS = O_CREAT | O_EXCL | O_RDWR; my $LOCKFLAG; unless ($^O eq 'MacOS') { for my $oflag (qw/ NOFOLLOW BINARY LARGEFILE NOINHERIT /) { my ($bit, $func) = (0, "Fcntl::O_" . $oflag); no strict 'refs'; $OPENFLAGS |= $bit if eval { # Make sure that redefined die handlers do not cause problems # e.g. CGI::Carp local $SIG{__DIE__} = sub {}; local $SIG{__WARN__} = sub {}; $bit = &$func(); 1; }; } # Special case O_EXLOCK $LOCKFLAG = eval { local $SIG{__DIE__} = sub {}; local $SIG{__WARN__} = sub {}; &Fcntl::O_EXLOCK(); }; } # On some systems the O_TEMPORARY flag can be used to tell the OS # to automatically remove the file when it is closed. This is fine # in most cases but not if tempfile is called with UNLINK=>0 and # the filename is requested -- in the case where the filename is to # be passed to another routine. This happens on windows. We overcome # this by using a second open flags variable my $OPENTEMPFLAGS = $OPENFLAGS; unless ($^O eq 'MacOS') { for my $oflag (qw/ TEMPORARY /) { my ($bit, $func) = (0, "Fcntl::O_" . $oflag); local($@); no strict 'refs'; $OPENTEMPFLAGS |= $bit if eval { # Make sure that redefined die handlers do not cause problems # e.g. CGI::Carp local $SIG{__DIE__} = sub {}; local $SIG{__WARN__} = sub {}; $bit = &$func(); 1; }; } } # Private hash tracking which files have been created by each process id via the OO interface my %FILES_CREATED_BY_OBJECT; # INTERNAL ROUTINES - not to be used outside of package # Generic routine for getting a temporary filename # modelled on OpenBSD _gettemp() in mktemp.c # The template must contain X's that are to be replaced # with the random values # Arguments: # TEMPLATE - string containing the XXXXX's that is converted # to a random filename and opened if required # Optionally, a hash can also be supplied containing specific options # "open" => if true open the temp file, else just return the name # default is 0 # "mkdir"=> if true, we are creating a temp directory rather than tempfile # default is 0 # "suffixlen" => number of characters at end of PATH to be ignored. # default is 0. # "unlink_on_close" => indicates that, if possible, the OS should remove # the file as soon as it is closed. Usually indicates # use of the O_TEMPORARY flag to sysopen. # Usually irrelevant on unix # "use_exlock" => Indicates that O_EXLOCK should be used. Default is true. # Optionally a reference to a scalar can be passed into the function # On error this will be used to store the reason for the error # "ErrStr" => \$errstr # "open" and "mkdir" can not both be true # "unlink_on_close" is not used when "mkdir" is true. # The default options are equivalent to mktemp(). # Returns: # filehandle - open file handle (if called with doopen=1, else undef) # temp name - name of the temp file or directory # For example: # ($fh, $name) = _gettemp($template, "open" => 1); # for the current version, failures are associated with # stored in an error string and returned to give the reason whilst debugging # This routine is not called by any external function sub _gettemp { croak 'Usage: ($fh, $name) = _gettemp($template, OPTIONS);' unless scalar(@_) >= 1; # the internal error string - expect it to be overridden # Need this in case the caller decides not to supply us a value # need an anonymous scalar my $tempErrStr; # Default options my %options = ( "open" => 0, "mkdir" => 0, "suffixlen" => 0, "unlink_on_close" => 0, "use_exlock" => 1, "ErrStr" => \$tempErrStr, ); # Read the template my $template = shift; if (ref($template)) { # Use a warning here since we have not yet merged ErrStr carp "File::Temp::_gettemp: template must not be a reference"; return (); } # Check that the number of entries on stack are even if (scalar(@_) % 2 != 0) { # Use a warning here since we have not yet merged ErrStr carp "File::Temp::_gettemp: Must have even number of options"; return (); } # Read the options and merge with defaults %options = (%options, @_) if @_; # Make sure the error string is set to undef ${$options{ErrStr}} = undef; # Can not open the file and make a directory in a single call if ($options{"open"} && $options{"mkdir"}) { ${$options{ErrStr}} = "doopen and domkdir can not both be true\n"; return (); } # Find the start of the end of the Xs (position of last X) # Substr starts from 0 my $start = length($template) - 1 - $options{"suffixlen"}; # Check that we have at least MINX x X (e.g. 'XXXX") at the end of the string # (taking suffixlen into account). Any fewer is insecure. # Do it using substr - no reason to use a pattern match since # we know where we are looking and what we are looking for if (substr($template, $start - MINX + 1, MINX) ne 'X' x MINX) { ${$options{ErrStr}} = "The template must end with at least ". MINX . " 'X' characters\n"; return (); } # Replace all the X at the end of the substring with a # random character or just all the XX at the end of a full string. # Do it as an if, since the suffix adjusts which section to replace # and suffixlen=0 returns nothing if used in the substr directly # and generate a full path from the template my $path = _replace_XX($template, $options{"suffixlen"}); # Split the path into constituent parts - eventually we need to check # whether the directory exists # We need to know whether we are making a temp directory # or a tempfile my ($volume, $directories, $file); my $parent; # parent directory if ($options{"mkdir"}) { # There is no filename at the end ($volume, $directories, $file) = File::Spec->splitpath( $path, 1); # The parent is then $directories without the last directory # Split the directory and put it back together again my @dirs = File::Spec->splitdir($directories); # If @dirs only has one entry (i.e. the directory template) that means # we are in the current directory if ($#dirs == 0) { $parent = File::Spec->curdir; } else { if ($^O eq 'VMS') { # need volume to avoid relative dir spec $parent = File::Spec->catdir($volume, @dirs[0..$#dirs-1]); $parent = 'sys$disk:[]' if $parent eq ''; } else { # Put it back together without the last one $parent = File::Spec->catdir(@dirs[0..$#dirs-1]); # ...and attach the volume (no filename) $parent = File::Spec->catpath($volume, $parent, ''); } } } else { # Get rid of the last filename (use File::Basename for this?) ($volume, $directories, $file) = File::Spec->splitpath( $path ); # Join up without the file part $parent = File::Spec->catpath($volume,$directories,''); # If $parent is empty replace with curdir $parent = File::Spec->curdir unless $directories ne ''; } # Check that the parent directories exist # Do this even for the case where we are simply returning a name # not a file -- no point returning a name that includes a directory # that does not exist or is not writable unless (-e $parent) { ${$options{ErrStr}} = "Parent directory ($parent) does not exist"; return (); } unless (-d $parent) { ${$options{ErrStr}} = "Parent directory ($parent) is not a directory"; return (); } # Check the stickiness of the directory and chown giveaway if required # If the directory is world writable the sticky bit # must be set if (File::Temp->safe_level == MEDIUM) { my $safeerr; unless (_is_safe($parent,\$safeerr)) { ${$options{ErrStr}} = "Parent directory ($parent) is not safe ($safeerr)"; return (); } } elsif (File::Temp->safe_level == HIGH) { my $safeerr; unless (_is_verysafe($parent, \$safeerr)) { ${$options{ErrStr}} = "Parent directory ($parent) is not safe ($safeerr)"; return (); } } # Now try MAX_TRIES time to open the file for (my $i = 0; $i < MAX_TRIES; $i++) { # Try to open the file if requested if ($options{"open"}) { my $fh; # If we are running before perl5.6.0 we can not auto-vivify if ($] < 5.006) { $fh = &Symbol::gensym; } # Try to make sure this will be marked close-on-exec # XXX: Win32 doesn't respect this, nor the proper fcntl, # but may have O_NOINHERIT. This may or may not be in Fcntl. local $^F = 2; # Attempt to open the file my $open_success = undef; if ( $^O eq 'VMS' and $options{"unlink_on_close"} && !$KEEP_ALL) { # make it auto delete on close by setting FAB$V_DLT bit $fh = VMS::Stdio::vmssysopen($path, $OPENFLAGS, 0600, 'fop=dlt'); $open_success = $fh; } else { my $flags = ( ($options{"unlink_on_close"} && !$KEEP_ALL) ? $OPENTEMPFLAGS : $OPENFLAGS ); $flags |= $LOCKFLAG if (defined $LOCKFLAG && $options{use_exlock}); $open_success = sysopen($fh, $path, $flags, 0600); } if ( $open_success ) { # in case of odd umask force rw chmod(0600, $path); # Opened successfully - return file handle and name return ($fh, $path); } else { # Error opening file - abort with error # if the reason was anything but EEXIST unless ($!{EEXIST}) { ${$options{ErrStr}} = "Could not create temp file $path: $!"; return (); } # Loop round for another try } } elsif ($options{"mkdir"}) { # Open the temp directory if (mkdir( $path, 0700)) { # in case of odd umask chmod(0700, $path); return undef, $path; } else { # Abort with error if the reason for failure was anything # except EEXIST unless ($!{EEXIST}) { ${$options{ErrStr}} = "Could not create directory $path: $!"; return (); } # Loop round for another try } } else { # Return true if the file can not be found # Directory has been checked previously return (undef, $path) unless -e $path; # Try again until MAX_TRIES } # Did not successfully open the tempfile/dir # so try again with a different set of random letters # No point in trying to increment unless we have only # 1 X say and the randomness could come up with the same # file MAX_TRIES in a row. # Store current attempt - in principal this implies that the # 3rd time around the open attempt that the first temp file # name could be generated again. Probably should store each # attempt and make sure that none are repeated my $original = $path; my $counter = 0; # Stop infinite loop my $MAX_GUESS = 50; do { # Generate new name from original template $path = _replace_XX($template, $options{"suffixlen"}); $counter++; } until ($path ne $original || $counter > $MAX_GUESS); # Check for out of control looping if ($counter > $MAX_GUESS) { ${$options{ErrStr}} = "Tried to get a new temp name different to the previous value $MAX_GUESS times.\nSomething wrong with template?? ($template)"; return (); } } # If we get here, we have run out of tries ${ $options{ErrStr} } = "Have exceeded the maximum number of attempts (" . MAX_TRIES . ") to open temp file/dir"; return (); } # Internal routine to replace the XXXX... with random characters # This has to be done by _gettemp() every time it fails to # open a temp file/dir # Arguments: $template (the template with XXX), # $ignore (number of characters at end to ignore) # Returns: modified template sub _replace_XX { croak 'Usage: _replace_XX($template, $ignore)' unless scalar(@_) == 2; my ($path, $ignore) = @_; # Do it as an if, since the suffix adjusts which section to replace # and suffixlen=0 returns nothing if used in the substr directly # Alternatively, could simply set $ignore to length($path)-1 # Don't want to always use substr when not required though. my $end = ( $] >= 5.006 ? "\\z" : "\\Z" ); if ($ignore) { substr($path, 0, - $ignore) =~ s/X(?=X*$end)/$CHARS[ int( rand( @CHARS ) ) ]/ge; } else { $path =~ s/X(?=X*$end)/$CHARS[ int( rand( @CHARS ) ) ]/ge; } return $path; } # Internal routine to force a temp file to be writable after # it is created so that we can unlink it. Windows seems to occassionally # force a file to be readonly when written to certain temp locations sub _force_writable { my $file = shift; chmod 0600, $file; } # internal routine to check to see if the directory is safe # First checks to see if the directory is not owned by the # current user or root. Then checks to see if anyone else # can write to the directory and if so, checks to see if # it has the sticky bit set # Will not work on systems that do not support sticky bit #Args: directory path to check # Optionally: reference to scalar to contain error message # Returns true if the path is safe and false otherwise. # Returns undef if can not even run stat() on the path # This routine based on version written by Tom Christiansen # Presumably, by the time we actually attempt to create the # file or directory in this directory, it may not be safe # anymore... Have to run _is_safe directly after the open. sub _is_safe { my $path = shift; my $err_ref = shift; # Stat path my @info = stat($path); unless (scalar(@info)) { $$err_ref = "stat(path) returned no values"; return 0; } ; return 1 if $^O eq 'VMS'; # owner delete control at file level # Check to see whether owner is neither superuser (or a system uid) nor me # Use the effective uid from the $> variable # UID is in [4] if ($info[4] > File::Temp->top_system_uid() && $info[4] != $>) { Carp::cluck(sprintf "uid=$info[4] topuid=%s euid=$> path='$path'", File::Temp->top_system_uid()); $$err_ref = "Directory owned neither by root nor the current user" if ref($err_ref); return 0; } # check whether group or other can write file # use 066 to detect either reading or writing # use 022 to check writability # Do it with S_IWOTH and S_IWGRP for portability (maybe) # mode is in info[2] if (($info[2] & &Fcntl::S_IWGRP) || # Is group writable? ($info[2] & &Fcntl::S_IWOTH) ) { # Is world writable? # Must be a directory unless (-d $path) { $$err_ref = "Path ($path) is not a directory" if ref($err_ref); return 0; } # Must have sticky bit set unless (-k $path) { $$err_ref = "Sticky bit not set on $path when dir is group|world writable" if ref($err_ref); return 0; } } return 1; } # Internal routine to check whether a directory is safe # for temp files. Safer than _is_safe since it checks for # the possibility of chown giveaway and if that is a possibility # checks each directory in the path to see if it is safe (with _is_safe) # If _PC_CHOWN_RESTRICTED is not set, does the full test of each # directory anyway. # Takes optional second arg as scalar ref to error reason sub _is_verysafe { # Need POSIX - but only want to bother if really necessary due to overhead require POSIX; my $path = shift; print "_is_verysafe testing $path\n" if $DEBUG; return 1 if $^O eq 'VMS'; # owner delete control at file level my $err_ref = shift; # Should Get the value of _PC_CHOWN_RESTRICTED if it is defined # and If it is not there do the extensive test local($@); my $chown_restricted; $chown_restricted = &POSIX::_PC_CHOWN_RESTRICTED() if eval { &POSIX::_PC_CHOWN_RESTRICTED(); 1}; # If chown_resticted is set to some value we should test it if (defined $chown_restricted) { # Return if the current directory is safe return _is_safe($path,$err_ref) if POSIX::sysconf( $chown_restricted ); } # To reach this point either, the _PC_CHOWN_RESTRICTED symbol # was not avialable or the symbol was there but chown giveaway # is allowed. Either way, we now have to test the entire tree for # safety. # Convert path to an absolute directory if required unless (File::Spec->file_name_is_absolute($path)) { $path = File::Spec->rel2abs($path); } # Split directory into components - assume no file my ($volume, $directories, undef) = File::Spec->splitpath( $path, 1); # Slightly less efficient than having a function in File::Spec # to chop off the end of a directory or even a function that # can handle ../ in a directory tree # Sometimes splitdir() returns a blank at the end # so we will probably check the bottom directory twice in some cases my @dirs = File::Spec->splitdir($directories); # Concatenate one less directory each time around foreach my $pos (0.. $#dirs) { # Get a directory name my $dir = File::Spec->catpath($volume, File::Spec->catdir(@dirs[0.. $#dirs - $pos]), '' ); print "TESTING DIR $dir\n" if $DEBUG; # Check the directory return 0 unless _is_safe($dir,$err_ref); } return 1; } # internal routine to determine whether unlink works on this # platform for files that are currently open. # Returns true if we can, false otherwise. # Currently WinNT, OS/2 and VMS can not unlink an opened file # On VMS this is because the O_EXCL flag is used to open the # temporary file. Currently I do not know enough about the issues # on VMS to decide whether O_EXCL is a requirement. sub _can_unlink_opened_file { if ($^O eq 'MSWin32' || $^O eq 'os2' || $^O eq 'VMS' || $^O eq 'dos' || $^O eq 'MacOS') { return 0; } else { return 1; } } # internal routine to decide which security levels are allowed # see safe_level() for more information on this # Controls whether the supplied security level is allowed # $cando = _can_do_level( $level ) sub _can_do_level { # Get security level my $level = shift; # Always have to be able to do STANDARD return 1 if $level == STANDARD; # Currently, the systems that can do HIGH or MEDIUM are identical if ( $^O eq 'MSWin32' || $^O eq 'os2' || $^O eq 'cygwin' || $^O eq 'dos' || $^O eq 'MacOS' || $^O eq 'mpeix') { return 0; } else { return 1; } } # This routine sets up a deferred unlinking of a specified # filename and filehandle. It is used in the following cases: # - Called by unlink0 if an opened file can not be unlinked # - Called by tempfile() if files are to be removed on shutdown # - Called by tempdir() if directories are to be removed on shutdown # Arguments: # _deferred_unlink( $fh, $fname, $isdir ); # # - filehandle (so that it can be expclicitly closed if open # - filename (the thing we want to remove) # - isdir (flag to indicate that we are being given a directory) # [and hence no filehandle] # Status is not referred to since all the magic is done with an END block { # Will set up two lexical variables to contain all the files to be # removed. One array for files, another for directories They will # only exist in this block. # This means we only have to set up a single END block to remove # all files. # in order to prevent child processes inadvertently deleting the parent # temp files we use a hash to store the temp files and directories # created by a particular process id. # %files_to_unlink contains values that are references to an array of # array references containing the filehandle and filename associated with # the temp file. my (%files_to_unlink, %dirs_to_unlink); # Set up an end block to use these arrays END { local($., $@, $!, $^E, $?); cleanup(); } # Cleanup function. Always triggered on END but can be invoked # manually. sub cleanup { if (!$KEEP_ALL) { # Files my @files = (exists $files_to_unlink{$$} ? @{ $files_to_unlink{$$} } : () ); foreach my $file (@files) { # close the filehandle without checking its state # in order to make real sure that this is closed # if its already closed then I dont care about the answer # probably a better way to do this close($file->[0]); # file handle is [0] if (-f $file->[1]) { # file name is [1] _force_writable( $file->[1] ); # for windows unlink $file->[1] or warn "Error removing ".$file->[1]; } } # Dirs my @dirs = (exists $dirs_to_unlink{$$} ? @{ $dirs_to_unlink{$$} } : () ); foreach my $dir (@dirs) { if (-d $dir) { # Some versions of rmtree will abort if you attempt to remove # the directory you are sitting in. We protect that and turn it # into a warning. We do this because this occurs during # cleanup and so can not be caught by the user. eval { rmtree($dir, $DEBUG, 0); }; warn $@ if ($@ && $^W); } } # clear the arrays @{ $files_to_unlink{$$} } = () if exists $files_to_unlink{$$}; @{ $dirs_to_unlink{$$} } = () if exists $dirs_to_unlink{$$}; } } # This is the sub called to register a file for deferred unlinking # This could simply store the input parameters and defer everything # until the END block. For now we do a bit of checking at this # point in order to make sure that (1) we have a file/dir to delete # and (2) we have been called with the correct arguments. sub _deferred_unlink { croak 'Usage: _deferred_unlink($fh, $fname, $isdir)' unless scalar(@_) == 3; my ($fh, $fname, $isdir) = @_; warn "Setting up deferred removal of $fname\n" if $DEBUG; # If we have a directory, check that it is a directory if ($isdir) { if (-d $fname) { # Directory exists so store it # first on VMS turn []foo into [.foo] for rmtree $fname = VMS::Filespec::vmspath($fname) if $^O eq 'VMS'; $dirs_to_unlink{$$} = [] unless exists $dirs_to_unlink{$$}; push (@{ $dirs_to_unlink{$$} }, $fname); } else { carp "Request to remove directory $fname could not be completed since it does not exist!\n" if $^W; } } else { if (-f $fname) { # file exists so store handle and name for later removal $files_to_unlink{$$} = [] unless exists $files_to_unlink{$$}; push(@{ $files_to_unlink{$$} }, [$fh, $fname]); } else { carp "Request to remove file $fname could not be completed since it is not there!\n" if $^W; } } } } =head1 OBJECT-ORIENTED INTERFACE This is the primary interface for interacting with C. Using the OO interface a temporary file can be created when the object is constructed and the file can be removed when the object is no longer required. Note that there is no method to obtain the filehandle from the C object. The object itself acts as a filehandle. Also, the object is configured such that it stringifies to the name of the temporary file, and can be compared to a filename directly. The object isa C and isa C so all those methods are available. =over 4 =item B Create a temporary file object. my $tmp = File::Temp->new(); by default the object is constructed as if C was called without options, but with the additional behaviour that the temporary file is removed by the object destructor if UNLINK is set to true (the default). Supported arguments are the same as for C: UNLINK (defaulting to true), DIR, EXLOCK and SUFFIX. Additionally, the filename template is specified using the TEMPLATE option. The OPEN option is not supported (the file is always opened). $tmp = File::Temp->new( TEMPLATE => 'tempXXXXX', DIR => 'mydir', SUFFIX => '.dat'); Arguments are case insensitive. Can call croak() if an error occurs. =cut sub new { my $proto = shift; my $class = ref($proto) || $proto; # read arguments and convert keys to upper case my %args = @_; %args = map { uc($_), $args{$_} } keys %args; # see if they are unlinking (defaulting to yes) my $unlink = (exists $args{UNLINK} ? $args{UNLINK} : 1 ); delete $args{UNLINK}; # template (store it in an array so that it will # disappear from the arg list of tempfile) my @template = ( exists $args{TEMPLATE} ? $args{TEMPLATE} : () ); delete $args{TEMPLATE}; # Protect OPEN delete $args{OPEN}; # Open the file and retain file handle and file name my ($fh, $path) = tempfile( @template, %args ); print "Tmp: $fh - $path\n" if $DEBUG; # Store the filename in the scalar slot ${*$fh} = $path; # Cache the filename by pid so that the destructor can decide whether to remove it $FILES_CREATED_BY_OBJECT{$$}{$path} = 1; # Store unlink information in hash slot (plus other constructor info) %{*$fh} = %args; # create the object bless $fh, $class; # final method-based configuration $fh->unlink_on_destroy( $unlink ); return $fh; } =item B Create a temporary directory using an object oriented interface. $dir = File::Temp->newdir(); By default the directory is deleted when the object goes out of scope. Supports the same options as the C function. Note that directories created with this method default to CLEANUP => 1. $dir = File::Temp->newdir( $template, %options ); =cut sub newdir { my $self = shift; # need to handle args as in tempdir because we have to force CLEANUP # default without passing CLEANUP to tempdir my $template = (scalar(@_) % 2 == 1 ? shift(@_) : undef ); my %options = @_; my $cleanup = (exists $options{CLEANUP} ? $options{CLEANUP} : 1 ); delete $options{CLEANUP}; my $tempdir; if (defined $template) { $tempdir = tempdir( $template, %options ); } else { $tempdir = tempdir( %options ); } return bless { DIRNAME => $tempdir, CLEANUP => $cleanup, LAUNCHPID => $$, }, "File::Temp::Dir"; } =item B Return the name of the temporary file associated with this object (if the object was created using the "new" constructor). $filename = $tmp->filename; This method is called automatically when the object is used as a string. =cut sub filename { my $self = shift; return ${*$self}; } sub STRINGIFY { my $self = shift; return $self->filename; } =item B Return the name of the temporary directory associated with this object (if the object was created using the "newdir" constructor). $dirname = $tmpdir->dirname; This method is called automatically when the object is used in string context. =item B Control whether the file is unlinked when the object goes out of scope. The file is removed if this value is true and $KEEP_ALL is not. $fh->unlink_on_destroy( 1 ); Default is for the file to be removed. =cut sub unlink_on_destroy { my $self = shift; if (@_) { ${*$self}{UNLINK} = shift; } return ${*$self}{UNLINK}; } =item B When the object goes out of scope, the destructor is called. This destructor will attempt to unlink the file (using C) if the constructor was called with UNLINK set to 1 (the default state if UNLINK is not specified). No error is given if the unlink fails. If the object has been passed to a child process during a fork, the file will be deleted when the object goes out of scope in the parent. For a temporary directory object the directory will be removed unless the CLEANUP argument was used in the constructor (and set to false) or C was modified after creation. If the global variable $KEEP_ALL is true, the file or directory will not be removed. =cut sub DESTROY { local($., $@, $!, $^E, $?); my $self = shift; # Make sure we always remove the file from the global hash # on destruction. This prevents the hash from growing uncontrollably # and post-destruction there is no reason to know about the file. my $file = $self->filename; my $was_created_by_proc; if (exists $FILES_CREATED_BY_OBJECT{$$}{$file}) { $was_created_by_proc = 1; delete $FILES_CREATED_BY_OBJECT{$$}{$file}; } if (${*$self}{UNLINK} && !$KEEP_ALL) { print "# ---------> Unlinking $self\n" if $DEBUG; # only delete if this process created it return unless $was_created_by_proc; # The unlink1 may fail if the file has been closed # by the caller. This leaves us with the decision # of whether to refuse to remove the file or simply # do an unlink without test. Seems to be silly # to do this when we are trying to be careful # about security _force_writable( $file ); # for windows unlink1( $self, $file ) or unlink($file); } } =back =head1 FUNCTIONS This section describes the recommended interface for generating temporary files and directories. =over 4 =item B This is the basic function to generate temporary files. The behaviour of the file can be changed using various options: $fh = tempfile(); ($fh, $filename) = tempfile(); Create a temporary file in the directory specified for temporary files, as specified by the tmpdir() function in L. ($fh, $filename) = tempfile($template); Create a temporary file in the current directory using the supplied template. Trailing `X' characters are replaced with random letters to generate the filename. At least four `X' characters must be present at the end of the template. ($fh, $filename) = tempfile($template, SUFFIX => $suffix) Same as previously, except that a suffix is added to the template after the `X' translation. Useful for ensuring that a temporary filename has a particular extension when needed by other applications. But see the WARNING at the end. ($fh, $filename) = tempfile($template, DIR => $dir); Translates the template as before except that a directory name is specified. ($fh, $filename) = tempfile($template, TMPDIR => 1); Equivalent to specifying a DIR of "File::Spec->tmpdir", writing the file into the same temporary directory as would be used if no template was specified at all. ($fh, $filename) = tempfile($template, UNLINK => 1); Return the filename and filehandle as before except that the file is automatically removed when the program exits (dependent on $KEEP_ALL). Default is for the file to be removed if a file handle is requested and to be kept if the filename is requested. In a scalar context (where no filename is returned) the file is always deleted either (depending on the operating system) on exit or when it is closed (unless $KEEP_ALL is true when the temp file is created). Use the object-oriented interface if fine-grained control of when a file is removed is required. If the template is not specified, a template is always automatically generated. This temporary file is placed in tmpdir() (L) unless a directory is specified explicitly with the DIR option. $fh = tempfile( DIR => $dir ); If called in scalar context, only the filehandle is returned and the file will automatically be deleted when closed on operating systems that support this (see the description of tmpfile() elsewhere in this document). This is the preferred mode of operation, as if you only have a filehandle, you can never create a race condition by fumbling with the filename. On systems that can not unlink an open file or can not mark a file as temporary when it is opened (for example, Windows NT uses the C flag) the file is marked for deletion when the program ends (equivalent to setting UNLINK to 1). The C flag is ignored if present. (undef, $filename) = tempfile($template, OPEN => 0); This will return the filename based on the template but will not open this file. Cannot be used in conjunction with UNLINK set to true. Default is to always open the file to protect from possible race conditions. A warning is issued if warnings are turned on. Consider using the tmpnam() and mktemp() functions described elsewhere in this document if opening the file is not required. If the operating system supports it (for example BSD derived systems), the filehandle will be opened with O_EXLOCK (open with exclusive file lock). This can sometimes cause problems if the intention is to pass the filename to another system that expects to take an exclusive lock itself (such as DBD::SQLite) whilst ensuring that the tempfile is not reused. In this situation the "EXLOCK" option can be passed to tempfile. By default EXLOCK will be true (this retains compatibility with earlier releases). ($fh, $filename) = tempfile($template, EXLOCK => 0); Options can be combined as required. Will croak() if there is an error. =cut sub tempfile { # Can not check for argument count since we can have any # number of args # Default options my %options = ( "DIR" => undef, # Directory prefix "SUFFIX" => '', # Template suffix "UNLINK" => 0, # Do not unlink file on exit "OPEN" => 1, # Open file "TMPDIR" => 0, # Place tempfile in tempdir if template specified "EXLOCK" => 1, # Open file with O_EXLOCK ); # Check to see whether we have an odd or even number of arguments my $template = (scalar(@_) % 2 == 1 ? shift(@_) : undef); # Read the options and merge with defaults %options = (%options, @_) if @_; # First decision is whether or not to open the file if (! $options{"OPEN"}) { warn "tempfile(): temporary filename requested but not opened.\nPossibly unsafe, consider using tempfile() with OPEN set to true\n" if $^W; } if ($options{"DIR"} and $^O eq 'VMS') { # on VMS turn []foo into [.foo] for concatenation $options{"DIR"} = VMS::Filespec::vmspath($options{"DIR"}); } # Construct the template # Have a choice of trying to work around the mkstemp/mktemp/tmpnam etc # functions or simply constructing a template and using _gettemp() # explicitly. Go for the latter # First generate a template if not defined and prefix the directory # If no template must prefix the temp directory if (defined $template) { # End up with current directory if neither DIR not TMPDIR are set if ($options{"DIR"}) { $template = File::Spec->catfile($options{"DIR"}, $template); } elsif ($options{TMPDIR}) { $template = File::Spec->catfile(File::Spec->tmpdir, $template ); } } else { if ($options{"DIR"}) { $template = File::Spec->catfile($options{"DIR"}, TEMPXXX); } else { $template = File::Spec->catfile(File::Spec->tmpdir, TEMPXXX); } } # Now add a suffix $template .= $options{"SUFFIX"}; # Determine whether we should tell _gettemp to unlink the file # On unix this is irrelevant and can be worked out after the file is # opened (simply by unlinking the open filehandle). On Windows or VMS # we have to indicate temporary-ness when we open the file. In general # we only want a true temporary file if we are returning just the # filehandle - if the user wants the filename they probably do not # want the file to disappear as soon as they close it (which may be # important if they want a child process to use the file) # For this reason, tie unlink_on_close to the return context regardless # of OS. my $unlink_on_close = ( wantarray ? 0 : 1); # Create the file my ($fh, $path, $errstr); croak "Error in tempfile() using $template: $errstr" unless (($fh, $path) = _gettemp($template, "open" => $options{'OPEN'}, "mkdir"=> 0 , "unlink_on_close" => $unlink_on_close, "suffixlen" => length($options{'SUFFIX'}), "ErrStr" => \$errstr, "use_exlock" => $options{EXLOCK}, ) ); # Set up an exit handler that can do whatever is right for the # system. This removes files at exit when requested explicitly or when # system is asked to unlink_on_close but is unable to do so because # of OS limitations. # The latter should be achieved by using a tied filehandle. # Do not check return status since this is all done with END blocks. _deferred_unlink($fh, $path, 0) if $options{"UNLINK"}; # Return if (wantarray()) { if ($options{'OPEN'}) { return ($fh, $path); } else { return (undef, $path); } } else { # Unlink the file. It is up to unlink0 to decide what to do with # this (whether to unlink now or to defer until later) unlink0($fh, $path) or croak "Error unlinking file $path using unlink0"; # Return just the filehandle. return $fh; } } =item B This is the recommended interface for creation of temporary directories. By default the directory will not be removed on exit (that is, it won't be temporary; this behaviour can not be changed because of issues with backwards compatibility). To enable removal either use the CLEANUP option which will trigger removal on program exit, or consider using the "newdir" method in the object interface which will allow the directory to be cleaned up when the object goes out of scope. The behaviour of the function depends on the arguments: $tempdir = tempdir(); Create a directory in tmpdir() (see L). $tempdir = tempdir( $template ); Create a directory from the supplied template. This template is similar to that described for tempfile(). `X' characters at the end of the template are replaced with random letters to construct the directory name. At least four `X' characters must be in the template. $tempdir = tempdir ( DIR => $dir ); Specifies the directory to use for the temporary directory. The temporary directory name is derived from an internal template. $tempdir = tempdir ( $template, DIR => $dir ); Prepend the supplied directory name to the template. The template should not include parent directory specifications itself. Any parent directory specifications are removed from the template before prepending the supplied directory. $tempdir = tempdir ( $template, TMPDIR => 1 ); Using the supplied template, create the temporary directory in a standard location for temporary files. Equivalent to doing $tempdir = tempdir ( $template, DIR => File::Spec->tmpdir); but shorter. Parent directory specifications are stripped from the template itself. The C option is ignored if C is set explicitly. Additionally, C is implied if neither a template nor a directory are supplied. $tempdir = tempdir( $template, CLEANUP => 1); Create a temporary directory using the supplied template, but attempt to remove it (and all files inside it) when the program exits. Note that an attempt will be made to remove all files from the directory even if they were not created by this module (otherwise why ask to clean it up?). The directory removal is made with the rmtree() function from the L module. Of course, if the template is not specified, the temporary directory will be created in tmpdir() and will also be removed at program exit. Will croak() if there is an error. =cut # ' sub tempdir { # Can not check for argument count since we can have any # number of args # Default options my %options = ( "CLEANUP" => 0, # Remove directory on exit "DIR" => '', # Root directory "TMPDIR" => 0, # Use tempdir with template ); # Check to see whether we have an odd or even number of arguments my $template = (scalar(@_) % 2 == 1 ? shift(@_) : undef ); # Read the options and merge with defaults %options = (%options, @_) if @_; # Modify or generate the template # Deal with the DIR and TMPDIR options if (defined $template) { # Need to strip directory path if using DIR or TMPDIR if ($options{'TMPDIR'} || $options{'DIR'}) { # Strip parent directory from the filename # # There is no filename at the end $template = VMS::Filespec::vmspath($template) if $^O eq 'VMS'; my ($volume, $directories, undef) = File::Spec->splitpath( $template, 1); # Last directory is then our template $template = (File::Spec->splitdir($directories))[-1]; # Prepend the supplied directory or temp dir if ($options{"DIR"}) { $template = File::Spec->catdir($options{"DIR"}, $template); } elsif ($options{TMPDIR}) { # Prepend tmpdir $template = File::Spec->catdir(File::Spec->tmpdir, $template); } } } else { if ($options{"DIR"}) { $template = File::Spec->catdir($options{"DIR"}, TEMPXXX); } else { $template = File::Spec->catdir(File::Spec->tmpdir, TEMPXXX); } } # Create the directory my $tempdir; my $suffixlen = 0; if ($^O eq 'VMS') { # dir names can end in delimiters $template =~ m/([\.\]:>]+)$/; $suffixlen = length($1); } if ( ($^O eq 'MacOS') && (substr($template, -1) eq ':') ) { # dir name has a trailing ':' ++$suffixlen; } my $errstr; croak "Error in tempdir() using $template: $errstr" unless ((undef, $tempdir) = _gettemp($template, "open" => 0, "mkdir"=> 1 , "suffixlen" => $suffixlen, "ErrStr" => \$errstr, ) ); # Install exit handler; must be dynamic to get lexical if ( $options{'CLEANUP'} && -d $tempdir) { _deferred_unlink(undef, $tempdir, 1); } # Return the dir name return $tempdir; } =back =head1 MKTEMP FUNCTIONS The following functions are Perl implementations of the mktemp() family of temp file generation system calls. =over 4 =item B Given a template, returns a filehandle to the temporary file and the name of the file. ($fh, $name) = mkstemp( $template ); In scalar context, just the filehandle is returned. The template may be any filename with some number of X's appended to it, for example F. The trailing X's are replaced with unique alphanumeric combinations. Will croak() if there is an error. =cut sub mkstemp { croak "Usage: mkstemp(template)" if scalar(@_) != 1; my $template = shift; my ($fh, $path, $errstr); croak "Error in mkstemp using $template: $errstr" unless (($fh, $path) = _gettemp($template, "open" => 1, "mkdir"=> 0 , "suffixlen" => 0, "ErrStr" => \$errstr, ) ); if (wantarray()) { return ($fh, $path); } else { return $fh; } } =item B Similar to mkstemp(), except that an extra argument can be supplied with a suffix to be appended to the template. ($fh, $name) = mkstemps( $template, $suffix ); For example a template of C and suffix of C<.dat> would generate a file similar to F. Returns just the filehandle alone when called in scalar context. Will croak() if there is an error. =cut sub mkstemps { croak "Usage: mkstemps(template, suffix)" if scalar(@_) != 2; my $template = shift; my $suffix = shift; $template .= $suffix; my ($fh, $path, $errstr); croak "Error in mkstemps using $template: $errstr" unless (($fh, $path) = _gettemp($template, "open" => 1, "mkdir"=> 0 , "suffixlen" => length($suffix), "ErrStr" => \$errstr, ) ); if (wantarray()) { return ($fh, $path); } else { return $fh; } } =item B Create a directory from a template. The template must end in X's that are replaced by the routine. $tmpdir_name = mkdtemp($template); Returns the name of the temporary directory created. Directory must be removed by the caller. Will croak() if there is an error. =cut #' # for emacs sub mkdtemp { croak "Usage: mkdtemp(template)" if scalar(@_) != 1; my $template = shift; my $suffixlen = 0; if ($^O eq 'VMS') { # dir names can end in delimiters $template =~ m/([\.\]:>]+)$/; $suffixlen = length($1); } if ( ($^O eq 'MacOS') && (substr($template, -1) eq ':') ) { # dir name has a trailing ':' ++$suffixlen; } my ($junk, $tmpdir, $errstr); croak "Error creating temp directory from template $template\: $errstr" unless (($junk, $tmpdir) = _gettemp($template, "open" => 0, "mkdir"=> 1 , "suffixlen" => $suffixlen, "ErrStr" => \$errstr, ) ); return $tmpdir; } =item B Returns a valid temporary filename but does not guarantee that the file will not be opened by someone else. $unopened_file = mktemp($template); Template is the same as that required by mkstemp(). Will croak() if there is an error. =cut sub mktemp { croak "Usage: mktemp(template)" if scalar(@_) != 1; my $template = shift; my ($tmpname, $junk, $errstr); croak "Error getting name to temp file from template $template: $errstr" unless (($junk, $tmpname) = _gettemp($template, "open" => 0, "mkdir"=> 0 , "suffixlen" => 0, "ErrStr" => \$errstr, ) ); return $tmpname; } =back =head1 POSIX FUNCTIONS This section describes the re-implementation of the tmpnam() and tmpfile() functions described in L using the mkstemp() from this module. Unlike the L implementations, the directory used for the temporary file is not specified in a system include file (C) but simply depends on the choice of tmpdir() returned by L. On some implementations this location can be set using the C environment variable, which may not be secure. If this is a problem, simply use mkstemp() and specify a template. =over 4 =item B When called in scalar context, returns the full name (including path) of a temporary file (uses mktemp()). The only check is that the file does not already exist, but there is no guarantee that that condition will continue to apply. $file = tmpnam(); When called in list context, a filehandle to the open file and a filename are returned. This is achieved by calling mkstemp() after constructing a suitable template. ($fh, $file) = tmpnam(); If possible, this form should be used to prevent possible race conditions. See L for information on the choice of temporary directory for a particular operating system. Will croak() if there is an error. =cut sub tmpnam { # Retrieve the temporary directory name my $tmpdir = File::Spec->tmpdir; croak "Error temporary directory is not writable" if $tmpdir eq ''; # Use a ten character template and append to tmpdir my $template = File::Spec->catfile($tmpdir, TEMPXXX); if (wantarray() ) { return mkstemp($template); } else { return mktemp($template); } } =item B Returns the filehandle of a temporary file. $fh = tmpfile(); The file is removed when the filehandle is closed or when the program exits. No access to the filename is provided. If the temporary file can not be created undef is returned. Currently this command will probably not work when the temporary directory is on an NFS file system. Will croak() if there is an error. =cut sub tmpfile { # Simply call tmpnam() in a list context my ($fh, $file) = tmpnam(); # Make sure file is removed when filehandle is closed # This will fail on NFS unlink0($fh, $file) or return undef; return $fh; } =back =head1 ADDITIONAL FUNCTIONS These functions are provided for backwards compatibility with common tempfile generation C library functions. They are not exported and must be addressed using the full package name. =over 4 =item B Return the name of a temporary file in the specified directory using a prefix. The file is guaranteed not to exist at the time the function was called, but such guarantees are good for one clock tick only. Always use the proper form of C with C if you must open such a filename. $filename = File::Temp::tempnam( $dir, $prefix ); Equivalent to running mktemp() with $dir/$prefixXXXXXXXX (using unix file convention as an example) Because this function uses mktemp(), it can suffer from race conditions. Will croak() if there is an error. =cut sub tempnam { croak 'Usage tempnam($dir, $prefix)' unless scalar(@_) == 2; my ($dir, $prefix) = @_; # Add a string to the prefix $prefix .= 'XXXXXXXX'; # Concatenate the directory to the file my $template = File::Spec->catfile($dir, $prefix); return mktemp($template); } =back =head1 UTILITY FUNCTIONS Useful functions for dealing with the filehandle and filename. =over 4 =item B Given an open filehandle and the associated filename, make a safe unlink. This is achieved by first checking that the filename and filehandle initially point to the same file and that the number of links to the file is 1 (all fields returned by stat() are compared). Then the filename is unlinked and the filehandle checked once again to verify that the number of links on that file is now 0. This is the closest you can come to making sure that the filename unlinked was the same as the file whose descriptor you hold. unlink0($fh, $path) or die "Error unlinking file $path safely"; Returns false on error but croaks() if there is a security anomaly. The filehandle is not closed since on some occasions this is not required. On some platforms, for example Windows NT, it is not possible to unlink an open file (the file must be closed first). On those platforms, the actual unlinking is deferred until the program ends and good status is returned. A check is still performed to make sure that the filehandle and filename are pointing to the same thing (but not at the time the end block is executed since the deferred removal may not have access to the filehandle). Additionally, on Windows NT not all the fields returned by stat() can be compared. For example, the C and C fields seem to be different. Also, it seems that the size of the file returned by stat() does not always agree, with C being more accurate than C, presumably because of caching issues even when using autoflush (this is usually overcome by waiting a while after writing to the tempfile before attempting to C it). Finally, on NFS file systems the link count of the file handle does not always go to zero immediately after unlinking. Currently, this command is expected to fail on NFS disks. This function is disabled if the global variable $KEEP_ALL is true and an unlink on open file is supported. If the unlink is to be deferred to the END block, the file is still registered for removal. This function should not be called if you are using the object oriented interface since the it will interfere with the object destructor deleting the file. =cut sub unlink0 { croak 'Usage: unlink0(filehandle, filename)' unless scalar(@_) == 2; # Read args my ($fh, $path) = @_; cmpstat($fh, $path) or return 0; # attempt remove the file (does not work on some platforms) if (_can_unlink_opened_file()) { # return early (Without unlink) if we have been instructed to retain files. return 1 if $KEEP_ALL; # XXX: do *not* call this on a directory; possible race # resulting in recursive removal croak "unlink0: $path has become a directory!" if -d $path; unlink($path) or return 0; # Stat the filehandle my @fh = stat $fh; print "Link count = $fh[3] \n" if $DEBUG; # Make sure that the link count is zero # - Cygwin provides deferred unlinking, however, # on Win9x the link count remains 1 # On NFS the link count may still be 1 but we cant know that # we are on NFS return ( $fh[3] == 0 or $^O eq 'cygwin' ? 1 : 0); } else { _deferred_unlink($fh, $path, 0); return 1; } } =item B Compare C of filehandle with C of provided filename. This can be used to check that the filename and filehandle initially point to the same file and that the number of links to the file is 1 (all fields returned by stat() are compared). cmpstat($fh, $path) or die "Error comparing handle with file"; Returns false if the stat information differs or if the link count is greater than 1. Calls croak if there is a security anomaly. On certain platforms, for example Windows, not all the fields returned by stat() can be compared. For example, the C and C fields seem to be different in Windows. Also, it seems that the size of the file returned by stat() does not always agree, with C being more accurate than C, presumably because of caching issues even when using autoflush (this is usually overcome by waiting a while after writing to the tempfile before attempting to C it). Not exported by default. =cut sub cmpstat { croak 'Usage: cmpstat(filehandle, filename)' unless scalar(@_) == 2; # Read args my ($fh, $path) = @_; warn "Comparing stat\n" if $DEBUG; # Stat the filehandle - which may be closed if someone has manually # closed the file. Can not turn off warnings without using $^W # unless we upgrade to 5.006 minimum requirement my @fh; { local ($^W) = 0; @fh = stat $fh; } return unless @fh; if ($fh[3] > 1 && $^W) { carp "unlink0: fstat found too many links; SB=@fh" if $^W; } # Stat the path my @path = stat $path; unless (@path) { carp "unlink0: $path is gone already" if $^W; return; } # this is no longer a file, but may be a directory, or worse unless (-f $path) { confess "panic: $path is no longer a file: SB=@fh"; } # Do comparison of each member of the array # On WinNT dev and rdev seem to be different # depending on whether it is a file or a handle. # Cannot simply compare all members of the stat return # Select the ones we can use my @okstat = (0..$#fh); # Use all by default if ($^O eq 'MSWin32') { @okstat = (1,2,3,4,5,7,8,9,10); } elsif ($^O eq 'os2') { @okstat = (0, 2..$#fh); } elsif ($^O eq 'VMS') { # device and file ID are sufficient @okstat = (0, 1); } elsif ($^O eq 'dos') { @okstat = (0,2..7,11..$#fh); } elsif ($^O eq 'mpeix') { @okstat = (0..4,8..10); } # Now compare each entry explicitly by number for (@okstat) { print "Comparing: $_ : $fh[$_] and $path[$_]\n" if $DEBUG; # Use eq rather than == since rdev, blksize, and blocks (6, 11, # and 12) will be '' on platforms that do not support them. This # is fine since we are only comparing integers. unless ($fh[$_] eq $path[$_]) { warn "Did not match $_ element of stat\n" if $DEBUG; return 0; } } return 1; } =item B Similar to C except after file comparison using cmpstat, the filehandle is closed prior to attempting to unlink the file. This allows the file to be removed without using an END block, but does mean that the post-unlink comparison of the filehandle state provided by C is not available. unlink1($fh, $path) or die "Error closing and unlinking file"; Usually called from the object destructor when using the OO interface. Not exported by default. This function is disabled if the global variable $KEEP_ALL is true. Can call croak() if there is a security anomaly during the stat() comparison. =cut sub unlink1 { croak 'Usage: unlink1(filehandle, filename)' unless scalar(@_) == 2; # Read args my ($fh, $path) = @_; cmpstat($fh, $path) or return 0; # Close the file close( $fh ) or return 0; # Make sure the file is writable (for windows) _force_writable( $path ); # return early (without unlink) if we have been instructed to retain files. return 1 if $KEEP_ALL; # remove the file return unlink($path); } =item B Calling this function will cause any temp files or temp directories that are registered for removal to be removed. This happens automatically when the process exits but can be triggered manually if the caller is sure that none of the temp files are required. This method can be registered as an Apache callback. On OSes where temp files are automatically removed when the temp file is closed, calling this function will have no effect other than to remove temporary directories (which may include temporary files). File::Temp::cleanup(); Not exported by default. =back =head1 PACKAGE VARIABLES These functions control the global state of the package. =over 4 =item B Controls the lengths to which the module will go to check the safety of the temporary file or directory before proceeding. Options are: =over 8 =item STANDARD Do the basic security measures to ensure the directory exists and is writable, that temporary files are opened only if they do not already exist, and that possible race conditions are avoided. Finally the L function is used to remove files safely. =item MEDIUM In addition to the STANDARD security, the output directory is checked to make sure that it is owned either by root or the user running the program. If the directory is writable by group or by other, it is then checked to make sure that the sticky bit is set. Will not work on platforms that do not support the C<-k> test for sticky bit. =item HIGH In addition to the MEDIUM security checks, also check for the possibility of ``chown() giveaway'' using the L sysconf() function. If this is a possibility, each directory in the path is checked in turn for safeness, recursively walking back to the root directory. For platforms that do not support the L C<_PC_CHOWN_RESTRICTED> symbol (for example, Windows NT) it is assumed that ``chown() giveaway'' is possible and the recursive test is performed. =back The level can be changed as follows: File::Temp->safe_level( File::Temp::HIGH ); The level constants are not exported by the module. Currently, you must be running at least perl v5.6.0 in order to run with MEDIUM or HIGH security. This is simply because the safety tests use functions from L that are not available in older versions of perl. The problem is that the version number for Fcntl is the same in perl 5.6.0 and in 5.005_03 even though they are different versions. On systems that do not support the HIGH or MEDIUM safety levels (for example Win NT or OS/2) any attempt to change the level will be ignored. The decision to ignore rather than raise an exception allows portable programs to be written with high security in mind for the systems that can support this without those programs failing on systems where the extra tests are irrelevant. If you really need to see whether the change has been accepted simply examine the return value of C. $newlevel = File::Temp->safe_level( File::Temp::HIGH ); die "Could not change to high security" if $newlevel != File::Temp::HIGH; =cut { # protect from using the variable itself my $LEVEL = STANDARD; sub safe_level { my $self = shift; if (@_) { my $level = shift; if (($level != STANDARD) && ($level != MEDIUM) && ($level != HIGH)) { carp "safe_level: Specified level ($level) not STANDARD, MEDIUM or HIGH - ignoring\n" if $^W; } else { # Dont allow this on perl 5.005 or earlier if ($] < 5.006 && $level != STANDARD) { # Cant do MEDIUM or HIGH checks croak "Currently requires perl 5.006 or newer to do the safe checks"; } # Check that we are allowed to change level # Silently ignore if we can not. $LEVEL = $level if _can_do_level($level); } } return $LEVEL; } } =item TopSystemUID This is the highest UID on the current system that refers to a root UID. This is used to make sure that the temporary directory is owned by a system UID (C, C, C etc) rather than simply by root. This is required since on many unix systems C is not owned by root. Default is to assume that any UID less than or equal to 10 is a root UID. File::Temp->top_system_uid(10); my $topid = File::Temp->top_system_uid; This value can be adjusted to reduce security checking if required. The value is only relevant when C is set to MEDIUM or higher. =cut { my $TopSystemUID = 10; $TopSystemUID = 197108 if $^O eq 'interix'; # "Administrator" sub top_system_uid { my $self = shift; if (@_) { my $newuid = shift; croak "top_system_uid: UIDs should be numeric" unless $newuid =~ /^\d+$/s; $TopSystemUID = $newuid; } return $TopSystemUID; } } =item B<$KEEP_ALL> Controls whether temporary files and directories should be retained regardless of any instructions in the program to remove them automatically. This is useful for debugging but should not be used in production code. $File::Temp::KEEP_ALL = 1; Default is for files to be removed as requested by the caller. In some cases, files will only be retained if this variable is true when the file is created. This means that you can not create a temporary file, set this variable and expect the temp file to still be around when the program exits. =item B<$DEBUG> Controls whether debugging messages should be enabled. $File::Temp::DEBUG = 1; Default is for debugging mode to be disabled. =back =head1 WARNING For maximum security, endeavour always to avoid ever looking at, touching, or even imputing the existence of the filename. You do not know that that filename is connected to the same file as the handle you have, and attempts to check this can only trigger more race conditions. It's far more secure to use the filehandle alone and dispense with the filename altogether. If you need to pass the handle to something that expects a filename then, on a unix system, use C<"/dev/fd/" . fileno($fh)> for arbitrary programs, or more generally C<< "+<=&" . fileno($fh) >> for Perl programs. You will have to clear the close-on-exec bit on that file descriptor before passing it to another process. use Fcntl qw/F_SETFD F_GETFD/; fcntl($tmpfh, F_SETFD, 0) or die "Can't clear close-on-exec flag on temp fh: $!\n"; =head2 Temporary files and NFS Some problems are associated with using temporary files that reside on NFS file systems and it is recommended that a local filesystem is used whenever possible. Some of the security tests will most probably fail when the temp file is not local. Additionally, be aware that the performance of I/O operations over NFS will not be as good as for a local disk. =head2 Forking In some cases files created by File::Temp are removed from within an END block. Since END blocks are triggered when a child process exits (unless C is used by the child) File::Temp takes care to only remove those temp files created by a particular process ID. This means that a child will not attempt to remove temp files created by the parent process. If you are forking many processes in parallel that are all creating temporary files, you may need to reset the random number seed using srand(EXPR) in each child else all the children will attempt to walk through the same set of random file names and may well cause themselves to give up if they exceed the number of retry attempts. =head2 Directory removal Note that if you have chdir'ed into the temporary directory and it is subsequently cleaned up (either in the END block or as part of object destruction), then you will get a warning from File::Path::rmtree(). =head2 BINMODE The file returned by File::Temp will have been opened in binary mode if such a mode is available. If that is not correct, use the C function to change the mode of the filehandle. Note that you can modify the encoding of a file opened by File::Temp also by using C. =head1 HISTORY Originally began life in May 1999 as an XS interface to the system mkstemp() function. In March 2000, the OpenBSD mkstemp() code was translated to Perl for total control of the code's security checking, to ensure the presence of the function regardless of operating system and to help with portability. The module was shipped as a standard part of perl from v5.6.1. =head1 SEE ALSO L, L, L, L See L and L, L for different implementations of temporary file handling. See L for an alternative object-oriented wrapper for the C function. =head1 AUTHOR Tim Jenness Etjenness@cpan.orgE Copyright (C) 2007-2009 Tim Jenness. Copyright (C) 1999-2007 Tim Jenness and the UK Particle Physics and Astronomy Research Council. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. Original Perl implementation loosely based on the OpenBSD C code for mkstemp(). Thanks to Tom Christiansen for suggesting that this module should be written and providing ideas for code improvements and security enhancements. =cut package File::Temp::Dir; use File::Path qw/ rmtree /; use strict; use overload '""' => "STRINGIFY", fallback => 1; # private class specifically to support tempdir objects # created by File::Temp->newdir # ostensibly the same method interface as File::Temp but without # inheriting all the IO::Seekable methods and other cruft # Read-only - returns the name of the temp directory sub dirname { my $self = shift; return $self->{DIRNAME}; } sub STRINGIFY { my $self = shift; return $self->dirname; } sub unlink_on_destroy { my $self = shift; if (@_) { $self->{CLEANUP} = shift; } return $self->{CLEANUP}; } sub DESTROY { my $self = shift; local($., $@, $!, $^E, $?); if ($self->unlink_on_destroy && $$ == $self->{LAUNCHPID} && !$File::Temp::KEEP_ALL) { if (-d $self->{DIRNAME}) { # Some versions of rmtree will abort if you attempt to remove # the directory you are sitting in. We protect that and turn it # into a warning. We do this because this occurs during object # destruction and so can not be caught by the user. eval { rmtree($self->{DIRNAME}, $File::Temp::DEBUG, 0); }; warn $@ if ($@ && $^W); } } } 1; # This document contains text in Perl "POD" format. # Use a POD viewer like perldoc or perlman to render it. =encoding utf-8 =head1 NAME Locale::Maketext::Cookbook - recipes for using Locale::Maketext =head1 INTRODUCTION This is a work in progress. Not much progress by now :-) =head1 ONESIDED LEXICONS I It may be common (for example at your main lexicon) that the hash keys and values coincide. Like that q{Hello, tell me your name} => q{Hello, tell me your name} It would be nice to just write: q{Hello, tell me your name} => '' and have this magically inflated to the first form. Among the advantages of such representation, that would lead to smaller files, less prone to mistyping or mispasting, and handy to someone translating it which can simply copy the main lexicon and enter the translation instead of having to remove the value first. That can be achieved by overriding C in your class and working on the main lexicon with code like that: package My::I18N; ... sub init { my $lh = shift; # a newborn handle $lh->SUPER::init(); inflate_lexicon(\%My::I18N::en::Lexicon); return; } sub inflate_lexicon { my $lex = shift; while (my ($k, $v) = each %$lex) { $v = $k if !defined $v || $v eq ''; } } Here we are assuming C to own the main lexicon. There are some downsides here: the size economy will not stand at runtime after this C runs. But it should not be that critical, since if you don't have space for that, you won't have space for any other language besides the main one as well. You could do that too with ties, expanding the value at lookup time which should be more time expensive as an option. =head1 DECIMAL PLACES IN NUMBER FORMATTING I The documentation of L advises that the standard bracket method C is limited and that you must override that for better results. It even suggests the use of L. One such defect of standard C is to not be able to use a certain decimal precision. For example, $lh->maketext('pi is [numf,_1]', 355/113); outputs pi is 3.14159292035398 Since pi ≈ 355/116 is only accurate to 6 decimal places, you would want to say: $lh->maketext('pi is [numf,_1,6]', 355/113); and get "pi is 3.141592". One solution for that could use C like that: package Wuu; use base qw(Locale::Maketext); use Number::Format; # can be overridden according to language conventions sub _numf_params { return ( -thousands_sep => '.', -decimal_point => ',', -decimal_digits => 2, ); } # builds a Number::Format sub _numf_formatter { my ($lh, $scale) = @_; my @params = $lh->_numf_params; if ($scale) { # use explicit scale rather than default push @params, (-decimal_digits => $scale); } return Number::Format->new(@params); } sub numf { my ($lh, $n, $scale) = @_; # get the (cached) formatter my $nf = $lh->{__nf}{$scale} ||= $lh->_numf_formatter($scale); # format the number itself return $nf->format_number($n); } package Wuu::pt; use base qw(Wuu); and then my $lh = Wuu->get_handle('pt'); $lh->maketext('A [numf,_1,3] km de distância', 1550.2222); would return "A 1.550,222 km de distância". Notice that the standard utility methods of C are irremediably limited because they could not aim to do everything that could be expected from them in different languages, cultures and applications. So extending C, C, and C is natural as soon as your needs exceed what the standard ones do. package File::stat; use 5.006; use strict; use warnings; use warnings::register; use Carp; BEGIN { *warnif = \&warnings::warnif } our(@EXPORT, @EXPORT_OK, %EXPORT_TAGS); our $VERSION = '1.05'; my @fields; BEGIN { use Exporter (); @EXPORT = qw(stat lstat); @fields = qw( $st_dev $st_ino $st_mode $st_nlink $st_uid $st_gid $st_rdev $st_size $st_atime $st_mtime $st_ctime $st_blksize $st_blocks ); @EXPORT_OK = ( @fields, "stat_cando" ); %EXPORT_TAGS = ( FIELDS => [ @fields, @EXPORT ] ); } use vars @fields; use Fcntl qw(S_IRUSR S_IWUSR S_IXUSR); BEGIN { # These constants will croak on use if the platform doesn't define # them. It's important to avoid inflicting that on the user. no strict 'refs'; for (qw(suid sgid svtx)) { my $val = eval { &{"Fcntl::S_I\U$_"} }; *{"_$_"} = defined $val ? sub { $_[0] & $val ? 1 : "" } : sub { "" }; } for (qw(SOCK CHR BLK REG DIR FIFO LNK)) { *{"S_IS$_"} = defined eval { &{"Fcntl::S_IF$_"} } ? \&{"Fcntl::S_IS$_"} : sub { "" }; } } # from doio.c sub _ingroup { my ($gid, $eff) = @_; # I am assuming that since VMS doesn't have getgroups(2), $) will # always only contain a single entry. $^O eq "VMS" and return $_[0] == $); my ($egid, @supp) = split " ", $); my ($rgid) = split " ", $(; $gid == ($eff ? $egid : $rgid) and return 1; grep $gid == $_, @supp and return 1; return ""; } # VMS uses the Unix version of the routine, even though this is very # suboptimal. VMS has a permissions structure that doesn't really fit # into struct stat, and unlike on Win32 the normal -X operators respect # that, but unfortunately by the time we get here we've already lost the # information we need. It looks to me as though if we were to preserve # the st_devnam entry of vmsish.h's fake struct stat (which actually # holds the filename) it might be possible to do this right, but both # getting that value out of the struct (perl's stat doesn't return it) # and interpreting it later would require this module to have an XS # component (at which point we might as well just call Perl_cando and # have done with it). if (grep $^O eq $_, qw/os2 MSWin32 dos/) { # from doio.c *cando = sub { ($_[0][2] & $_[1]) ? 1 : "" }; } else { # from doio.c *cando = sub { my ($s, $mode, $eff) = @_; my $uid = $eff ? $> : $<; # If we're root on unix and we are not testing for executable # status, then all file tests are true. $^O ne "VMS" and $uid == 0 and !($mode & 0111) and return 1; my ($stmode, $stuid, $stgid) = @$s[2,4,5]; # This code basically assumes that the rwx bits of the mode are # the 0777 bits, but so does Perl_cando. if ($stuid == $uid) { $stmode & $mode and return 1; } elsif (_ingroup($stgid, $eff)) { $stmode & ($mode >> 3) and return 1; } else { $stmode & ($mode >> 6) and return 1; } return ""; }; } # alias for those who don't like objects *stat_cando = \&cando; my %op = ( r => sub { cando($_[0], S_IRUSR, 1) }, w => sub { cando($_[0], S_IWUSR, 1) }, x => sub { cando($_[0], S_IXUSR, 1) }, o => sub { $_[0][4] == $> }, R => sub { cando($_[0], S_IRUSR, 0) }, W => sub { cando($_[0], S_IWUSR, 0) }, X => sub { cando($_[0], S_IXUSR, 0) }, O => sub { $_[0][4] == $< }, e => sub { 1 }, z => sub { $_[0][7] == 0 }, s => sub { $_[0][7] }, f => sub { S_ISREG ($_[0][2]) }, d => sub { S_ISDIR ($_[0][2]) }, l => sub { S_ISLNK ($_[0][2]) }, p => sub { S_ISFIFO($_[0][2]) }, S => sub { S_ISSOCK($_[0][2]) }, b => sub { S_ISBLK ($_[0][2]) }, c => sub { S_ISCHR ($_[0][2]) }, u => sub { _suid($_[0][2]) }, g => sub { _sgid($_[0][2]) }, k => sub { _svtx($_[0][2]) }, M => sub { ($^T - $_[0][9] ) / 86400 }, C => sub { ($^T - $_[0][10]) / 86400 }, A => sub { ($^T - $_[0][8] ) / 86400 }, ); use constant HINT_FILETEST_ACCESS => 0x00400000; # we need fallback=>1 or stringifying breaks use overload fallback => 1, -X => sub { my ($s, $op) = @_; if (index "rwxRWX", $op) { (caller 0)[8] & HINT_FILETEST_ACCESS and warnif("File::stat ignores use filetest 'access'"); $^O eq "VMS" and warnif("File::stat ignores VMS ACLs"); # It would be nice to have a warning about using -l on a # non-lstat, but that would require an extra member in the # object. } if ($op{$op}) { return $op{$op}->($_[0]); } else { croak "-$op is not implemented on a File::stat object"; } }; # Class::Struct forbids use of @ISA sub import { goto &Exporter::import } use Class::Struct qw(struct); struct 'File::stat' => [ map { $_ => '$' } qw{ dev ino mode nlink uid gid rdev size atime mtime ctime blksize blocks } ]; sub populate (@) { return unless @_; my $stob = new(); @$stob = ( $st_dev, $st_ino, $st_mode, $st_nlink, $st_uid, $st_gid, $st_rdev, $st_size, $st_atime, $st_mtime, $st_ctime, $st_blksize, $st_blocks ) = @_; return $stob; } sub lstat ($) { populate(CORE::lstat(shift)) } sub stat ($) { my $arg = shift; my $st = populate(CORE::stat $arg); return $st if defined $st; my $fh; { local $!; no strict 'refs'; require Symbol; $fh = \*{ Symbol::qualify( $arg, caller() )}; return unless defined fileno $fh; } return populate(CORE::stat $fh); } 1; __END__ =head1 NAME File::stat - by-name interface to Perl's built-in stat() functions =head1 SYNOPSIS use File::stat; $st = stat($file) or die "No $file: $!"; if ( ($st->mode & 0111) && $st->nlink > 1) ) { print "$file is executable with lotsa links\n"; } if ( -x $st ) { print "$file is executable\n"; } use Fcntl "S_IRUSR"; if ( $st->cando(S_IRUSR, 1) ) { print "My effective uid can read $file\n"; } use File::stat qw(:FIELDS); stat($file) or die "No $file: $!"; if ( ($st_mode & 0111) && ($st_nlink > 1) ) { print "$file is executable with lotsa links\n"; } =head1 DESCRIPTION This module's default exports override the core stat() and lstat() functions, replacing them with versions that return "File::stat" objects. This object has methods that return the similarly named structure field name from the stat(2) function; namely, dev, ino, mode, nlink, uid, gid, rdev, size, atime, mtime, ctime, blksize, and blocks. As of version 1.02 (provided with perl 5.12) the object provides C<"-X"> overloading, so you can call filetest operators (C<-f>, C<-x>, and so on) on it. It also provides a C<< ->cando >> method, called like $st->cando( ACCESS, EFFECTIVE ) where I is one of C, C or C from the L module, and I indicates whether to use effective (true) or real (false) ids. The method interprets the C, C and C fields, and returns whether or not the current process would be allowed the specified access. If you don't want to use the objects, you may import the C<< ->cando >> method into your namespace as a regular function called C. This takes an arrayref containing the return values of C or C as its first argument, and interprets it for you. You may also import all the structure fields directly into your namespace as regular variables using the :FIELDS import tag. (Note that this still overrides your stat() and lstat() functions.) Access these fields as variables named with a preceding C in front their method names. Thus, C<$stat_obj-Edev()> corresponds to $st_dev if you import the fields. To access this functionality without the core overrides, pass the C an empty import list, and then access function functions with their full qualified names. On the other hand, the built-ins are still available via the C pseudo-pacpackage Filter::Simple; use Text::Balanced ':ALL'; use vars qw{ $VERSION @EXPORT }; $VERSION = '0.86'; use Filter::Util::Call; use Carp; @EXPORT = qw( FILTER FILTER_ONLY ); sub import { if (@_>1) { shift; goto &FILTER } else { *{caller()."::$_"} = \&$_ foreach @EXPORT } } sub fail { croak "FILTER_ONLY: ", @_; } my $exql = sub { my @bits = extract_quotelike $_[0], qr//; return unless $bits[0]; return \@bits; }; my $ncws = qr/\s+/; my $comment = qr/(? [ $ws, \&extract_variable, $id, { MATCH => \&extract_quotelike } ], regex => [ $ws, $pod_or_DATA, $id, $exql ], string => [ $ws, $pod_or_DATA, $id, $exql ], code => [ $ws, { DONT_MATCH => $pod_or_DATA }, \&extract_variable, $id, { DONT_MATCH => \&extract_quotelike } ], code_no_comments => [ { DONT_MATCH => $comment }, $ncws, { DONT_MATCH => $pod_or_DATA }, \&extract_variable, $id, { DONT_MATCH => \&extract_quotelike } ], executable => [ $ws, { DONT_MATCH => $pod_or_DATA } ], executable_no_comments => [ { DONT_MATCH => $comment }, $ncws, { DONT_MATCH => $pod_or_DATA } ], all => [ { MATCH => qr/(?s:.*)/ } ], ); my %selector_for = ( all => sub { my ($t)=@_; sub{ $_=$$_; $t->(@_); $_} }, executable=> sub { my ($t)=@_; sub{ref() ? $_=$$_ : $t->(@_); $_} }, quotelike => sub { my ($t)=@_; sub{ref() && do{$_=$$_; $t->(@_)}; $_} }, regex => sub { my ($t)=@_; sub{ref() or return $_; my ($ql,undef,$pre,$op,$ld,$pat) = @$_; return $_->[0] unless $op =~ /^(qr|m|s)/ || !$op && ($ld eq '/' || $ld eq '?'); $_ = $pat; $t->(@_); $ql =~ s/^(\s*\Q$op\E\s*\Q$ld\E)\Q$pat\E/$1$_/; return "$pre$ql"; }; }, string => sub { my ($t)=@_; sub{ref() or return $_; local *args = \@_; my ($pre,$op,$ld1,$str1,$rd1,$ld2,$str2,$rd2,$flg) = @{$_}[2..10]; return $_->[0] if $op =~ /^(qr|m)/ || !$op && ($ld1 eq '/' || $ld1 eq '?'); if (!$op || $op eq 'tr' || $op eq 'y') { local *_ = \$str1; $t->(@args); } if ($op =~ /^(tr|y|s)/) { local *_ = \$str2; $t->(@args); } my $result = "$pre$op$ld1$str1$rd1"; $result .= $ld2 if $ld1 =~ m/[[({<]/; #])}> $result .= "$str2$rd2$flg"; return $result; }; }, ); sub gen_std_filter_for { my ($type, $transform) = @_; return sub { my $instr; local @components; for (extract_multiple($_,$extractor_for{$type})) { if (ref()) { push @components, $_; $instr=0 } elsif ($instr) { $components[-1] .= $_ } else { push @components, $_; $instr=1 } } if ($type =~ /^code/) { my $count = 0; local $placeholder = qr/\Q$;\E(\C{4})\Q$;\E/; my $extractor = qr/\Q$;\E(\C{4})\Q$;\E/; $_ = join "", map { ref $_ ? $;.pack('N',$count++).$; : $_ } @components; @components = grep { ref $_ } @components; $transform->(@_); s/$extractor/${$components[unpack('N',$1)]}/g; } else { my $selector = $selector_for{$type}->($transform); $_ = join "", map $selector->(@_), @components; } } }; sub FILTER (&;$) { my $caller = caller; my ($filter, $terminator) = @_; no warnings 'redefine'; *{"${caller}::import"} = gen_filter_import($caller,$filter,$terminator); *{"${caller}::unimport"} = gen_filter_unimport($caller); } sub FILTER_ONLY { my $caller = caller; while (@_ > 1) { my ($what, $how) = splice(@_, 0, 2); fail "Unknown selector: $what" unless exists $extractor_for{$what}; fail "Filter for $what is not a subroutine reference" unless ref $how eq 'CODE'; push @transforms, gen_std_filter_for($what,$how); } my $terminator = shift; my $multitransform = sub { foreach my $transform ( @transforms ) { $transform->(@_); } }; no warnings 'redefine'; *{"${caller}::import"} = gen_filter_import($caller,$multitransform,$terminator); *{"${caller}::unimport"} = gen_filter_unimport($caller); } my $ows = qr/(?:[ \t]+|#[^\n]*)*/; sub gen_filter_import { my ($class, $filter, $terminator) = @_; my %terminator; my $prev_import = *{$class."::import"}{CODE}; return sub { my ($imported_class, @args) = @_; my $def_terminator = qr/^(?:\s*no\s+$imported_class\s*;$ows|__(?:END|DATA)__)\r?$/; if (!defined $terminator) { $terminator{terminator} = $def_terminator; } elsif (!ref $terminator || ref $terminator eq 'Regexp') { $terminator{terminator} = $terminator; } elsif (ref $terminator ne 'HASH') { croak "Terminator must be specified as scalar or hash ref" } elsif (!exists $terminator->{terminator}) { $terminator{terminator} = $def_terminator; } filter_add( sub { my ($status, $lastline); my $count = 0; my $data = ""; while ($status = filter_read()) { return $status if $status < 0; if ($terminator{terminator} && m/$terminator{terminator}/) { $lastline = $_; last; } $data .= $_; $count++; $_ = ""; } return $count if not $count; $_ = $data; $filter->($imported_class, @args) unless $status < 0; if (defined $lastline) { if (defined $terminator{becomes}) { $_ .= $terminator{becomes}; } elsif ($lastline =~ $def_terminator) { $_ .= $lastline; } } return $count; } ); if ($prev_import) { goto &$prev_import; } elsif ($class->isa('Exporter')) { $class->export_to_level(1,@_); } } } sub gen_filter_unimport { my ($class) = @_; return sub { filter_del(); goto &$prev_unimport if $prev_unimport; } } 1; __END__ =head1 NAME Filter::Simple - Simplified source filtering =head1 SYNOPSIS # in MyFilter.pm: package MyFilter; use Filter::Simple; FILTER { ... }; # or just: # # use Filter::Simple sub { ... }; # in user's code: use MyFilter; # this code is filtered no MyFilter; # this code is not =head1 DESCRIPTION =head2 The Problem Source filtering is an immensely powerful feature of recent versions of Perl. It allows one to extend the language itself (e.g. the Switch module), to simplify the language (e.g. Language::Pythonesque), or to completely recast the language (e.g. Lingua::Romana::Perligata). Effectively, it allows one to use the full power of Perl as its own, recursively applied, macro language. The excellent Filter::Util::Call module (by Paul Marquess) provides a usable Perl interface to source filtering, but it is often too powerful and not nearly as simple as it could be. To use the module it is necessary to do the following: =over 4 =item 1. Download, build, and install the Filter::Util::Call module. (If you have Perl 5.7.1 or later, this is already done for you.) =item 2. Set up a module that does a C. =item 3. Within that module, create an C subroutine. =item 4. Within the C subroutine do a call to C, passing it either a subroutine reference. =item 5. Within the subroutine reference, call C or C to "prime" $_ with source code data from the source file that will C your module. Check the status value returned to see if any source code was actually read in. =item 6. Process the contents of $_ to change the source code in the desired manner. =item 7. Return the status value. =item 8. If the act of unimporting your module (via a C) should cause source code filtering to cease, create an C subroutine, and have it call C. Make sure that the call to C or C in step 5 will not accidentally read past the C. Effectively this limits source code filters to line-by-line operation, unless the C subroutine does some fancy pre-pre-parsing of the source code it's filtering. =back For example, here is a minimal source code filter in a module named BANG.pm. It simply converts every occurrence of the sequence C to the sequence C in any piece of code following a C statement (until the next C statement, if any): package BANG; use Filter::Util::Call ; sub import { filter_add( sub { my $caller = caller; my ($status, $no_seen, $data); while ($status = filter_read()) { if (/^\s*no\s+$caller\s*;\s*?$/) { $no_seen=1; last; } $data .= $_; $_ = ""; } $_ = $data; s/BANG\s+BANG/die 'BANG' if \$BANG/g unless $status < 0; $_ .= "no $class;\n" if $no_seen; return 1; }) } sub unimport { filter_del(); } 1 ; This level of sophistication puts filtering out of the reach of many programmers. =head2 A Solution The Filter::Simple module provides a simplified interface to Filter::Util::Call; one that is sufficient for most common cases. Instead of the above process, with Filter::Simple the task of setting up a source code filter is reduced to: =over 4 =item 1. Download and install the Filter::Simple module. (If you have Perl 5.7.1 or later, this is already done for you.) =item 2. Set up a module that does a C and then calls C. =item 3. Within the anonymous subroutine or block that is passed to C, process the contents of $_ to change the source code in the desired manner. =back In other words, the previous example, would become: package BANG; use Filter::Simple; FILTER { s/BANG\s+BANG/die 'BANG' if \$BANG/g; }; 1 ; Note that the source code is passed as a single string, so any regex that uses C<^> or C<$> to detect line boundaries will need the C flag. =head2 Disabling or changing behaviour By default, the installed filter only filters up to a line consisting of one of the three standard source "terminators": no ModuleName; # optional comment or: __END__ or: __DATA__ but this can be altered by passing a second argument to C or C (just remember: there's I comma after the initial block when you use C). That second argument may be either a C'd regular expression (which is then used to match the terminator line), or a defined false value (which indicates that no terminator line should be looked for), or a reference to a hash (in which case the terminator is the value associated with the key C<'terminator'>. For example, to cause the previous filter to filter only up to a line of the form: GNAB esu; you would write: package BANG; use Filter::Simple; FILTER { s/BANG\s+BANG/die 'BANG' if \$BANG/g; } qr/^\s*GNAB\s+esu\s*;\s*?$/; or: FILTER { s/BANG\s+BANG/die 'BANG' if \$BANG/g; } { terminator => qr/^\s*GNAB\s+esu\s*;\s*?$/ }; and to prevent the filter's being turned off in any way: package BANG; use Filter::Simple; FILTER { s/BANG\s+BANG/die 'BANG' if \$BANG/g; } ""; # or: 0 or: FILTER { s/BANG\s+BANG/die 'BANG' if \$BANG/g; } { terminator => "" }; B be contained on a single source line.> =head2 All-in-one interface Separating the loading of Filter::Simple: use Filter::Simple; from the setting up of the filtering: FILTER { ... }; is useful because it allows other code (typically parser support code or caching variables) to be defined before the filter is invoked. However, there is often no need for such a separation. In those cases, it is easier to just append the filtering subroutine and any terminator specification directly to the C statement that loads Filter::Simple, like so: use Filter::Simple sub { s/BANG\s+BANG/die 'BANG' if \$BANG/g; }; This is exactly the same as: use Filter::Simple; BEGIN { Filter::Simple::FILTER { s/BANG\s+BANG/die 'BANG' if \$BANG/g; }; } except that the C subroutine is not exported by Filter::Simple. =head2 Filtering only specific components of source code One of the problems with a filter like: use Filter::Simple; FILTER { s/BANG\s+BANG/die 'BANG' if \$BANG/g }; is that it indiscriminately applies the specified transformation to the entire text of your source program. So something like: warn 'BANG BANG, YOU'RE DEAD'; BANG BANG; will become: warn 'die 'BANG' if $BANG, YOU'RE DEAD'; die 'BANG' if $BANG; It is very common when filtering source to only want to apply the filter to the non-character-string parts of the code, or alternatively to I the character strings. Filter::Simple supports this type of filtering by automatically exporting the C subroutine. C takes a sequence of specifiers that install separate (and possibly multiple) filters that act on only parts of the source code. For example: use Filter::Simple; FILTER_ONLY code => sub { s/BANG\s+BANG/die 'BANG' if \$BANG/g }, quotelike => sub { s/BANG\s+BANG/CHITTY CHITTY/g }; The C<"code"> subroutine will only be used to filter parts of the source code that are not quotelikes, POD, or C<__DATA__>. The C subroutine only filters Perl quotelikes (including here documents). The full list of alternatives is: =over =item C<"code"> Filters only those sections of the source code that are not quotelikes, POD, or C<__DATA__>. =item C<"code_no_comments"> Filters only those sections of the source code that are not quotelikes, POD, comments, or C<__DATA__>. =item C<"executable"> Filters only those sections of the source code that are not POD or C<__DATA__>. =item C<"executable_no_comments"> Filters only those sections of the source code that are not POD, comments, or C<__DATA__>. =item C<"quotelike"> Filters only Perl quotelikes (as interpreted by C<&Text::Balanced::extract_quotelike>). =item C<"string"> Filters only the string literal parts of a Perl quotelike (i.e. the contents of a string literal, either half of a C, the second half of an C). =item C<"regex"> Filters only the pattern literal parts of a Perl quotelike (i.e. the contents of a C or an C, the first half of an C). =item C<"all"> Filters everything. Identical in effect to C. =back Except for C<< FILTER_ONLY code => sub {...} >>, each of the component filters is called repeatedly, once for each component found in the source code. Note that you can also apply two or more of the same type of filter in a single C. For example, here's a simple macro-preprocessor that is only applied within regexes, with a final debugging pass that prints the resulting source code: use Regexp::Common; FILTER_ONLY regex => sub { s/!\[/[^/g }, regex => sub { s/%d/$RE{num}{int}/g }, regex => sub { s/%f/$RE{num}{real}/g }, all => sub { print if $::DEBUG }; =head2 Filtering only the code parts of source code Most source code ceases to be grammatically correct when it is broken up into the pieces between string literals and regexes. So the C<'code'> and C<'code_no_comments'> component filter behave slightly differently from the other partial filters described in the previous section. Rather than calling the specified processor on each individual piece of code (i.e. on the bits between quotelikes), the C<'code...'> partial filters operate on the entire source code, but with the quotelike bits (and, in the case of C<'code_no_comments'>, the comments) "blanked out". That is, a C<'code...'> filter I each quoted string, quotelike, regex, POD, and __DATA__ section with a placeholder. The delimiters of this placeholder are the contents of the C<$;> variable at the time the filter is applied (normally C<"\034">). The remaining four bytes are a unique identifier for the component being replaced. This approach makes it comparatively easy to write code preprocessors without worrying about the form or contents of strings, regexes, etc. For convenience, during a C<'code...'> filtering operation, Filter::Simple provides a package variable (C<$Filter::Simple::placeholder>) that contains a pre-compiled regex that matches any placeholder...and captures the identifier within the placeholder. Placeholders can be moved and re-ordered within the source code as needed. In addition, a second package variable (C<@Filter::Simple::components>) contains a list of the various pieces of C<$_>, as they were originally split up to allow placeholders to be inserted. Once the filtering has been applied, the original strings, regexes, POD, etc. are re-inserted into the code, by replacing each placeholder with the corresponding original component (from C<@components>). Note that this means that the C<@components> variable must be treated with extreme care within the filter. The C<@components> array stores the "back- translations" of each placeholder inserted into C<$_>, as well as the interstitial source code between placeholders. If the placeholder backtranslations are altered in C<@components>, they will be similarly changed when the placeholders are removed from C<$_> after the filter is complete. For example, the following filter detects concatenated pairs of strings/quotelikes and reverses the order in which they are concatenated: package DemoRevCat; use Filter::Simple; FILTER_ONLY code => sub { my $ph = $Filter::Simple::placeholder; s{ ($ph) \s* [.] \s* ($ph) }{ $2.$1 }gx }; Thus, the following code: use DemoRevCat; my $str = "abc" . q(def); print "$str\n"; would become: my $str = q(def)."abc"; print "$str\n"; and hence print: defabc =head2 Using Filter::Simple with an explicit C subroutine Filter::Simple generates a special C subroutine for your module (see L<"How it works">) which would normally replace any C subroutine you might have explicitly declared. However, Filter::Simple is smart enough to notice your existing C and Do The Right Thing with it. That is, if you explicitly define an C subroutine in a package that's using Filter::Simple, that C subroutine will still be invoked immediately after any filter you install. The only thing you have to remember is that the C subroutine I be declared I the filter is installed. If you use C to install the filter: package Filter::TurnItUpTo11; use Filter::Simple; FILTER { s/(\w+)/\U$1/ }; that will almost never be a problem, but if you install a filtering subroutine by passing it directly to the C statement: package Filter::TurnItUpTo11; use Filter::Simple sub{ s/(\w+)/\U$1/ }; then you must make sure that your C subroutine appears before that C statement. =head2 Using Filter::Simple and Exporter together Likewise, Filter::Simple is also smart enough to Do The Right Thing if you use Exporter: package Switch; use base Exporter; use Filter::Simple; @EXPORT = qw(switch case); @EXPORT_OK = qw(given when); FILTER { $_ = magic_Perl_filter($_) } Immediately after the filter has been applied to the source, Filter::Simple will pass control to Exporter, so it can do its magic too. Of course, here too, Filter::Simple has to know you're using Exporter before it applies the filter. That's almost never a problem, but if you're nervous about it, you can guarantee that things will work correctly by ensuring that your C always precedes your C. =head2 How it works The Filter::Simple module exports into the package that calls C (or Cs it directly) -- such as package "BANG" in the above example -- two automagically constructed subroutines -- C and C -- which take care of all the nasty details. In addition, the generated C subroutine passes its own argument list to the filtering subroutine, so the BANG.pm filter could easily be made parametric: package BANG; use Filter::Simple; FILTER { my ($die_msg, $var_name) = @_; s/BANG\s+BANG/die '$die_msg' if \${$var_name}/g; }; # and in some user code: use BANG "BOOM", "BAM"; # "BANG BANG" becomes: die 'BOOM' if $BAM The specified filtering subroutine is called every time a C is encountered, and passed all the source code following that call, up to either the next C (or whatever terminator you've set) or the end of the source file, whichever occurs first. By default, any C call must appear by itself on a separate line, or it is ignored. =head1 AUTHOR Damian Conway =head1 CONTACT Filter::Simple is now maintained by the Perl5-Porters. Please submit bug via the C tool that comes with your perl. For usage instructions, read C or possibly C. For mostly anything else, please contact Eperl5-porters@perl.orgE. Maintainer of the CPAN release is Steffen Mueller Esmueller@cpan.orgE. Contact him with technical difficulties with respect to the packaging of the CPAN module. Praise of the module, flowers, and presents still go to the author, Damian Conway Edamian@conway.orgE. =head1 COPYRIGHT AND LICENSE Copyright (c) 2000-2008, Damian Conway. All Rights Reserved. This module is free software. It may be used, redistributed and/or modified under the same terms as Perl itself.  ./3 ..Tiny.pm ./3 .. Collate.pm LangTags.pm LangTags # Time-stamp: "2004-01-11 18:35:34 AST" =head1 NAME Locale::Maketext - framework for localization =head1 SYNOPSIS package MyProgram; use strict; use MyProgram::L10N; # ...which inherits from Locale::Maketext my $lh = MyProgram::L10N->get_handle() || die "What language?"; ... # And then any messages your program emits, like: warn $lh->maketext( "Can't open file [_1]: [_2]\n", $f, $! ); ... =head1 DESCRIPTION It is a common feature of applications (whether run directly, or via the Web) for them to be "localized" -- i.e., for them to a present an English interface to an English-speaker, a German interface to a German-speaker, and so on for all languages it's programmed with. Locale::Maketext is a framework for software localization; it provides you with the tools for organizing and accessing the bits of text and text-processing code that you need for producing localized applications. In order to make sense of Maketext and how all its components fit together, you should probably go read L, and I read the following documentation. You may also want to read over the source for C and its constituent modules -- they are a complete (if small) example application that uses Maketext. =head1 QUICK OVERVIEW The basic design of Locale::Maketext is object-oriented, and Locale::Maketext is an abstract base class, from which you derive a "project class". The project class (with a name like "TkBocciBall::Localize", which you then use in your module) is in turn the base class for all the "language classes" for your project (with names "TkBocciBall::Localize::it", "TkBocciBall::Localize::en", "TkBocciBall::Localize::fr", etc.). A language class is a class containing a lexicon of phrases as class data, and possibly also some methods that are of use in interpreting phrases in the lexicon, or otherwise dealing with text in that language. An object belonging to a language class is called a "language handle"; it's typically a flyweight object. The normal course of action is to call: use TkBocciBall::Localize; # the localization project class $lh = TkBocciBall::Localize->get_handle(); # Depending on the user's locale, etc., this will # make a language handle from among the classes available, # and any defaults that you declare. die "Couldn't make a language handle??" unless $lh; From then on, you use the C function to access entries in whatever lexicon(s) belong to the language handle you got. So, this: print $lh->maketext("You won!"), "\n"; ...emits the right text for this language. If the object in C<$lh> belongs to class "TkBocciBall::Localize::fr" and %TkBocciBall::Localize::fr::Lexicon contains C<("You won!" =E "Tu as gagnE!")>, then the above code happily tells the user "Tu as gagnE!". =head1 METHODS Locale::Maketext offers a variety of methods, which fall into three categories: =over =item * Methods to do with constructing language handles. =item * C and other methods to do with accessing %Lexicon data for a given language handle. =item * Methods that you may find it handy to use, from routines of yours that you put in %Lexicon entries. =back These are covered in the following section. =head2 Construction Methods These are to do with constructing a language handle: =over =item * $lh = YourProjClass->get_handle( ...langtags... ) || die "lg-handle?"; This tries loading classes based on the language-tags you give (like C<("en-US", "sk", "kon", "es-MX", "ja", "i-klingon")>, and for the first class that succeeds, returns YourProjClass::I->new(). If it runs thru the entire given list of language-tags, and finds no classes for those exact terms, it then tries "superordinate" language classes. So if no "en-US" class (i.e., YourProjClass::en_us) was found, nor classes for anything else in that list, we then try its superordinate, "en" (i.e., YourProjClass::en), and so on thru the other language-tags in the given list: "es". (The other language-tags in our example list: happen to have no superordinates.) If none of those language-tags leads to loadable classes, we then try classes derived from YourProjClass->fallback_languages() and then if nothing comes of that, we use classes named by YourProjClass->fallback_language_classes(). Then in the (probably quite unlikely) event that that fails, we just return undef. =item * $lh = YourProjClass->get_handleB<()> || die "lg-handle?"; When C is called with an empty parameter list, magic happens: If C senses that it's running in program that was invoked as a CGI, then it tries to get language-tags out of the environment variable "HTTP_ACCEPT_LANGUAGE", and it pretends that those were the languages passed as parameters to C. Otherwise (i.e., if not a CGI), this tries various OS-specific ways to get the language-tags for the current locale/language, and then pretends that those were the value(s) passed to C. Currently this OS-specific stuff consists of looking in the environment variables "LANG" and "LANGUAGE"; and on MSWin machines (where those variables are typically unused), this also tries using the module Win32::Locale to get a language-tag for whatever language/locale is currently selected in the "Regional Settings" (or "International"?) Control Panel. I welcome further suggestions for making this do the Right Thing under other operating systems that support localization. If you're using localization in an application that keeps a configuration file, you might consider something like this in your project class: sub get_handle_via_config { my $class = $_[0]; my $chosen_language = $Config_settings{'language'}; my $lh; if($chosen_language) { $lh = $class->get_handle($chosen_language) || die "No language handle for \"$chosen_language\" or the like"; } else { # Config file missing, maybe? $lh = $class->get_handle() || die "Can't get a language handle"; } return $lh; } =item * $lh = YourProjClass::langname->new(); This constructs a language handle. You usually B call this directly, but instead let C find a language class to C and to then call ->new on. =item * $lh->init(); This is called by ->new to initialize newly-constructed language handles. If you define an init method in your class, remember that it's usually considered a good idea to call $lh->SUPER::init in it (presumably at the beginning), so that all classes get a chance to initialize a new object however they see fit. =item * YourProjClass->fallback_languages() C appends the return value of this to the end of whatever list of languages you pass C. Unless you override this method, your project class will inherit Locale::Maketext's C, which currently returns C<('i-default', 'en', 'en-US')>. ("i-default" is defined in RFC 2277). This method (by having it return the name of a language-tag that has an existing language class) can be used for making sure that C will always manage to construct a language handle (assuming your language classes are in an appropriate @INC directory). Or you can use the next method: =item * YourProjClass->fallback_language_classes() C appends the return value of this to the end of the list of classes it will try using. Unless you override this method, your project class will inherit Locale::Maketext's C, which currently returns an empty list, C<()>. By setting this to some value (namely, the name of a loadable language class), you can be sure that C will always manage to construct a language handle. =back =head2 The "maketext" Method This is the most important method in Locale::Maketext: $text = $lh->maketext(I, ...parameters for this phrase...); This looks in the %Lexicon of the language handle $lh and all its superclasses, looking for an entry whose key is the string I. Assuming such an entry is found, various things then happen, depending on the value found: If the value is a scalarref, the scalar is dereferenced and returned (and any parameters are ignored). If the value is a coderef, we return &$value($lh, ...parameters...). If the value is a string that I look like it's in Bracket Notation, we return it (after replacing it with a scalarref, in its %Lexicon). If the value I look like it's in Bracket Notation, then we compile it into a sub, replace the string in the %Lexicon with the new coderef, and then we return &$new_sub($lh, ...parameters...). Bracket Notation is discussed in a later section. Note that trying to compile a string into Bracket Notation can throw an exception if the string is not syntactically valid (say, by not balancing brackets right.) Also, calling &$coderef($lh, ...parameters...) can throw any sort of exception (if, say, code in that sub tries to divide by zero). But a very common exception occurs when you have Bracket Notation text that says to call a method "foo", but there is no such method. (E.g., "You have [quaB,_1,ball]." will throw an exception on trying to call $lh->quaB($_[1],'ball') -- you presumably meant "quant".) C catches these exceptions, but only to make the error message more readable, at which point it rethrows the exception. An exception I be thrown if I is not found in any of $lh's %Lexicon hashes. What happens if a key is not found, is discussed in a later section, "Controlling Lookup Failure". Note that you might find it useful in some cases to override the C method with an "after method", if you want to translate encodings, or even scripts: package YrProj::zh_cn; # Chinese with PRC-style glyphs use base ('YrProj::zh_tw'); # Taiwan-style sub maketext { my $self = shift(@_); my $value = $self->maketext(@_); return Chineeze::taiwan2mainland($value); } Or you may want to override it with something that traps any exceptions, if that's critical to your program: sub maketext { my($lh, @stuff) = @_; my $out; eval { $out = $lh->SUPER::maketext(@stuff) }; return $out unless $@; ...otherwise deal with the exception... } Other than those two situations, I don't imagine that it's useful to override the C method. (If you run into a situation where it is useful, I'd be interested in hearing about it.) =over =item $lh->fail_with I $lh->fail_with(I) =item $lh->failure_handler_auto These two methods are discussed in the section "Controlling Lookup Failure". =back =head2 Utility Methods These are methods that you may find it handy to use, generally from %Lexicon routines of yours (whether expressed as Bracket Notation or not). =over =item $language->quant($number, $singular) =item $language->quant($number, $singular, $plural) =item $language->quant($number, $singular, $plural, $negative) This is generally meant to be called from inside Bracket Notation (which is discussed later), as in "Your search matched [quant,_1,document]!" It's for I a noun (i.e., saying how much of it there is, while giving the correct form of it). The behavior of this method is handy for English and a few other Western European languages, and you should override it for languages where it's not suitable. You can feel free to read the source, but the current implementation is basically as this pseudocode describes: if $number is 0 and there's a $negative, return $negative; elsif $number is 1, return "1 $singular"; elsif there's a $plural, return "$number $plural"; else return "$number " . $singular . "s"; # # ...except that we actually call numf to # stringify $number before returning it. So for English (with Bracket Notation) C<"...[quant,_1,file]..."> is fine (for 0 it returns "0 files", for 1 it returns "1 file", and for more it returns "2 files", etc.) But for "directory", you'd want C<"[quant,_1,directory,directories]"> so that our elementary C method doesn't think that the plural of "directory" is "directorys". And you might find that the output may sound better if you specify a negative form, as in: "[quant,_1,file,files,No files] matched your query.\n" Remember to keep in mind verb agreement (or adjectives too, in other languages), as in: "[quant,_1,document] were matched.\n" Because if _1 is one, you get "1 document B matched". An acceptable hack here is to do something like this: "[quant,_1,document was, documents were] matched.\n" =item $language->numf($number) This returns the given number formatted nicely according to this language's conventions. Maketext's default method is mostly to just take the normal string form of the number (applying sprintf "%G" for only very large numbers), and then to add commas as necessary. (Except that we apply C if $language->{'numf_comma'} is true; that's a bit of a hack that's useful for languages that express two million as "2.000.000" and not as "2,000,000"). If you want anything fancier, consider overriding this with something that uses L, or does something else entirely. Note that numf is called by quant for stringifying all quantifying numbers. =item $language->sprintf($format, @items) This is just a wrapper around Perl's normal C function. It's provided so that you can use "sprintf" in Bracket Notation: "Couldn't access datanode [sprintf,%10x=~[%s~],_1,_2]!\n" returning... Couldn't access datanode Stuff=[thangamabob]! =item $language->language_tag() Currently this just takes the last bit of C, turns underscores to dashes, and returns it. So if $language is an object of class Hee::HOO::Haw::en_us, $language->language_tag() returns "en-us". (Yes, the usual representation for that language tag is "en-US", but case is I considered meaningful in language-tag comparison.) You may override this as you like; Maketext doesn't use it for anything. =item $language->encoding() Currently this isn't used for anything, but it's provided (with default value of C<(ref($language) && $language-E{'encoding'})) or "iso-8859-1"> ) as a sort of suggestion that it may be useful/necessary to associate encodings with your language handles (whether on a per-class or even per-handle basis.) =back =head2 Language Handle Attributes and Internals A language handle is a flyweight object -- i.e., it doesn't (necessarily) carry any data of interest, other than just being a member of whatever class it belongs to. A language handle is implemented as a blessed hash. Subclasses of yours can store whatever data you want in the hash. Currently the only hash entry used by any crucial Maketext method is "fail", so feel free to use anything else as you like. B This documentation is vastly longer than the module source itself. =over =back =head1 LANGUAGE CLASS HIERARCHIES These are Locale::Maketext's assumptions about the class hierarchy formed by all your language classes: =over =item * You must have a project base class, which you load, and which you then use as the first argument in the call to YourProjClass->get_handle(...). It should derive (whether directly or indirectly) from Locale::Maketext. It B how you name this class, although assuming this is the localization component of your Super Mega Program, good names for your project class might be SuperMegaProgram::Localization, SuperMegaProgram::L10N, SuperMegaProgram::I18N, SuperMegaProgram::International, or even SuperMegaProgram::Languages or SuperMegaProgram::Messages. =item * Language classes are what YourProjClass->get_handle will try to load. It will look for them by taking each language-tag (B it if it doesn't look like a language-tag or locale-tag!), turning it to all lowercase, turning dashes to underscores, and appending it to YourProjClass . "::". So this: $lh = YourProjClass->get_handle( 'en-US', 'fr', 'kon', 'i-klingon', 'i-klingon-romanized' ); will try loading the classes YourProjClass::en_us (note lowercase!), YourProjClass::fr, YourProjClass::kon, YourProjClass::i_klingon and YourProjClass::i_klingon_romanized. (And it'll stop at the first one that actually loads.) =item * I assume that each language class derives (directly or indirectly) from your project class, and also defines its @ISA, its %Lexicon, or both. But I anticipate no dire consequences if these assumptions do not hold. =item * Language classes may derive from other language classes (although they should have "use I" or "use base qw(I<...classes...>)"). They may derive from the project class. They may derive from some other class altogether. Or via multiple inheritance, it may derive from any mixture of these. =item * I foresee no problems with having multiple inheritance in your hierarchy of language classes. (As usual, however, Perl will complain bitterly if you have a cycle in the hierarchy: i.e., if any class is its own ancestor.) =back =head1 ENTRIES IN EACH LEXICON A typical %Lexicon entry is meant to signify a phrase, taking some number (0 or more) of parameters. An entry is meant to be accessed by via a string I in $lh->maketext(I, ...parameters...), which should return a string that is generally meant for be used for "output" to the user -- regardless of whether this actually means printing to STDOUT, writing to a file, or putting into a GUI widget. While the key must be a string value (since that's a basic restriction that Perl places on hash keys), the value in the lexicon can currently be of several types: a defined scalar, scalarref, or coderef. The use of these is explained above, in the section 'The "maketext" Method', and Bracket Notation for strings is discussed in the next section. While you can use arbitrary unique IDs for lexicon keys (like "_min_larger_max_error"), it is often useful for if an entry's key is itself a valid value, like this example error message: "Minimum ([_1]) is larger than maximum ([_2])!\n", Compare this code that uses an arbitrary ID... die $lh->maketext( "_min_larger_max_error", $min, $max ) if $min > $max; ...to this code that uses a key-as-value: die $lh->maketext( "Minimum ([_1]) is larger than maximum ([_2])!\n", $min, $max ) if $min > $max; The second is, in short, more readable. In particular, it's obvious that the number of parameters you're feeding to that phrase (two) is the number of parameters that it I to be fed. (Since you see _1 and a _2 being used in the key there.) Also, once a project is otherwise complete and you start to localize it, you can scrape together all the various keys you use, and pass it to a translator; and then the translator's work will go faster if what he's presented is this: "Minimum ([_1]) is larger than maximum ([_2])!\n", => "", # fill in something here, Jacques! rather than this more cryptic mess: "_min_larger_max_error" => "", # fill in something here, Jacques I think that keys as lexicon values makes the completed lexicon entries more readable: "Minimum ([_1]) is larger than maximum ([_2])!\n", => "Le minimum ([_1]) est plus grand que le maximum ([_2])!\n", Also, having valid values as keys becomes very useful if you set up an _AUTO lexicon. _AUTO lexicons are discussed in a later section. I almost always use keys that are themselves valid lexicon values. One notable exception is when the value is quite long. For example, to get the screenful of data that a command-line program might return when given an unknown switch, I often just use a brief, self-explanatory key such as "_USAGE_MESSAGE". At that point I then go and immediately to define that lexicon entry in the ProjectClass::L10N::en lexicon (since English is always my "project language"): '_USAGE_MESSAGE' => <<'EOSTUFF', ...long long message... EOSTUFF and then I can use it as: getopt('oDI', \%opts) or die $lh->maketext('_USAGE_MESSAGE'); Incidentally, note that each class's C<%Lexicon> inherits-and-extends the lexicons in its superclasses. This is not because these are special hashes I, but because you access them via the C method, which looks for entries across all the C<%Lexicon> hashes in a language class I all its ancestor classes. (This is because the idea of "class data" isn't directly implemented in Perl, but is instead left to individual class-systems to implement as they see fit..) Note that you may have things stored in a lexicon besides just phrases for output: for example, if your program takes input from the keyboard, asking a "(Y/N)" question, you probably need to know what the equivalent of "Y[es]/N[o]" is in whatever language. You probably also need to know what the equivalents of the answers "y" and "n" are. You can store that information in the lexicon (say, under the keys "~answer_y" and "~answer_n", and the long forms as "~answer_yes" and "~answer_no", where "~" is just an ad-hoc character meant to indicate to programmers/translators that these are not phrases for output). Or instead of storing this in the language class's lexicon, you can (and, in some cases, really should) represent the same bit of knowledge as code in a method in the language class. (That leaves a tidy distinction between the lexicon as the things we know how to I, and the rest of the things in the lexicon class as things that we know how to I.) Consider this example of a processor for responses to French "oui/non" questions: sub y_or_n { return undef unless defined $_[1] and length $_[1]; my $answer = lc $_[1]; # smash case return 1 if $answer eq 'o' or $answer eq 'oui'; return 0 if $answer eq 'n' or $answer eq 'non'; return undef; } ...which you'd then call in a construct like this: my $response; until(defined $response) { print $lh->maketext("Open the pod bay door (y/n)? "); $response = $lh->y_or_n( get_input_from_keyboard_somehow() ); } if($response) { $pod_bay_door->open() } else { $pod_bay_door->leave_closed() } Other data worth storing in a lexicon might be things like filenames for language-targetted resources: ... "_main_splash_png" => "/styles/en_us/main_splash.png", "_main_splash_imagemap" => "/styles/en_us/main_splash.incl", "_general_graphics_path" => "/styles/en_us/", "_alert_sound" => "/styles/en_us/hey_there.wav", "_forward_icon" => "left_arrow.png", "_backward_icon" => "right_arrow.png", # In some other languages, left equals # BACKwards, and right is FOREwards. ... You might want to do the same thing for expressing key bindings or the like (since hardwiring "q" as the binding for the function that quits a screen/menu/program is useful only if your language happens to associate "q" with "quit"!) =head1 BRACKET NOTATION Bracket Notation is a crucial feature of Locale::Maketext. I mean Bracket Notation to provide a replacement for the use of sprintf formatting. Everything you do with Bracket Notation could be done with a sub block, but bracket notation is meant to be much more concise. Bracket Notation is a like a miniature "template" system (in the sense of L, not in the sense of C++ templates), where normal text is passed thru basically as is, but text in special regions is specially interpreted. In Bracket Notation, you use square brackets ("[...]"), not curly braces ("{...}") to note sections that are specially interpreted. For example, here all the areas that are taken literally are underlined with a "^", and all the in-bracket special regions are underlined with an X: "Minimum ([_1]) is larger than maximum ([_2])!\n", ^^^^^^^^^ XX ^^^^^^^^^^^^^^^^^^^^^^^^^^ XX ^^^^ When that string is compiled from bracket notation into a real Perl sub, it's basically turned into: sub { my $lh = $_[0]; my @params = @_; return join '', "Minimum (", ...some code here... ") is larger than maximum (", ...some code here... ")!\n", } # to be called by $lh->maketext(KEY, params...) In other words, text outside bracket groups is turned into string literals. Text in brackets is rather more complex, and currently follows these rules: =over =item * Bracket groups that are empty, or which consist only of whitespace, are ignored. (Examples: "[]", "[ ]", or a [ and a ] with returns and/or tabs and/or spaces between them. Otherwise, each group is taken to be a comma-separated group of items, and each item is interpreted as follows: =item * An item that is "_I" or "_-I" is interpreted as $_[I]. I.e., "_1" becomes with $_[1], and "_-3" is interpreted as $_[-3] (in which case @_ should have at least three elements in it). Note that $_[0] is the language handle, and is typically not named directly. =item * An item "_*" is interpreted to mean "all of @_ except $_[0]". I.e., C<@_[1..$#_]>. Note that this is an empty list in the case of calls like $lh->maketext(I) where there are no parameters (except $_[0], the language handle). =item * Otherwise, each item is interpreted as a string literal. =back The group as a whole is interpreted as follows: =over =item * If the first item in a bracket group looks like a method name, then that group is interpreted like this: $lh->that_method_name( ...rest of items in this group... ), =item * If the first item in a bracket group is "*", it's taken as shorthand for the so commonly called "quant" method. Similarly, if the first item in a bracket group is "#", it's taken to be shorthand for "numf". =item * If the first item in a bracket group is the empty-string, or "_*" or "_I" or "_-I", then that group is interpreted as just the interpolation of all its items: join('', ...rest of items in this group... ), Examples: "[_1]" and "[,_1]", which are synonymous; and "C<[,ID-(,_4,-,_2,)]>", which compiles as C. =item * Otherwise this bracket group is invalid. For example, in the group "[!@#,whatever]", the first item C<"!@#"> is neither the empty-string, "_I", "_-I", "_*", nor a valid method name; and so Locale::Maketext will throw an exception of you try compiling an expression containing this bracket group. =back Note, incidentally, that items in each group are comma-separated, not C-separated. That is, you might expect that this bracket group: "Hoohah [foo, _1 , bar ,baz]!" would compile to this: sub { my $lh = $_[0]; return join '', "Hoohah ", $lh->foo( $_[1], "bar", "baz"), "!", } But it actually compiles as this: sub { my $lh = $_[0]; return join '', "Hoohah ", $lh->foo(" _1 ", " bar ", "baz"), # note the in " bar " "!", } In the notation discussed so far, the characters "[" and "]" are given special meaning, for opening and closing bracket groups, and "," has a special meaning inside bracket groups, where it separates items in the group. This begs the question of how you'd express a literal "[" or "]" in a Bracket Notation string, and how you'd express a literal comma inside a bracket group. For this purpose I've adopted "~" (tilde) as an escape character: "~[" means a literal '[' character anywhere in Bracket Notation (i.e., regardless of whether you're in a bracket group or not), and ditto for "~]" meaning a literal ']', and "~," meaning a literal comma. (Altho "," means a literal comma outside of bracket groups -- it's only inside bracket groups that commas are special.) And on the off chance you need a literal tilde in a bracket expression, you get it with "~~". Currently, an unescaped "~" before a character other than a bracket or a comma is taken to mean just a "~" and that character. I.e., "~X" means the same as "~~X" -- i.e., one literal tilde, and then one literal "X". However, by using "~X", you are assuming that no future version of Maketext will use "~X" as a magic escape sequence. In practice this is not a great problem, since first off you can just write "~~X" and not worry about it; second off, I doubt I'll add lots of new magic characters to bracket notation; and third off, you aren't likely to want literal "~" characters in your messages anyway, since it's not a character with wide use in natural language text. Brackets must be balanced -- every openbracket must have one matching closebracket, and vice versa. So these are all B: "I ate [quant,_1,rhubarb pie." "I ate [quant,_1,rhubarb pie[." "I ate quant,_1,rhubarb pie]." "I ate quant,_1,rhubarb pie[." Currently, bracket groups do not nest. That is, you B say: "Foo [bar,baz,[quux,quuux]]\n"; If you need a notation that's that powerful, use normal Perl: %Lexicon = ( ... "some_key" => sub { my $lh = $_[0]; join '', "Foo ", $lh->bar('baz', $lh->quux('quuux')), "\n", }, ... ); Or write the "bar" method so you don't need to pass it the output from calling quux. I do not anticipate that you will need (or particularly want) to nest bracket groups, but you are welcome to email me with convincing (real-life) arguments to the contrary. =head1 AUTO LEXICONS If maketext goes to look in an individual %Lexicon for an entry for I (where I does not start with an underscore), and sees none, B an entry of "_AUTO" => I, then we actually define $Lexicon{I} = I right then and there, and then use that value as if it had been there all along. This happens before we even look in any superclass %Lexicons! (This is meant to be somewhat like the AUTOLOAD mechanism in Perl's function call system -- or, looked at another way, like the L module.) I can picture all sorts of circumstances where you just do not want lookup to be able to fail (since failing normally means that maketext throws a C, although see the next section for greater control over that). But here's one circumstance where _AUTO lexicons are meant to be I useful: As you're writing an application, you decide as you go what messages you need to emit. Normally you'd go to write this: if(-e $filename) { go_process_file($filename) } else { print qq{Couldn't find file "$filename"!\n}; } but since you anticipate localizing this, you write: use ThisProject::I18N; my $lh = ThisProject::I18N->get_handle(); # For the moment, assume that things are set up so # that we load class ThisProject::I18N::en # and that that's the class that $lh belongs to. ... if(-e $filename) { go_process_file($filename) } else { print $lh->maketext( qq{Couldn't find file "[_1]"!\n}, $filename ); } Now, right after you've just written the above lines, you'd normally have to go open the file ThisProject/I18N/en.pm, and immediately add an entry: "Couldn't find file \"[_1]\"!\n" => "Couldn't find file \"[_1]\"!\n", But I consider that somewhat of a distraction from the work of getting the main code working -- to say nothing of the fact that I often have to play with the program a few times before I can decide exactly what wording I want in the messages (which in this case would require me to go changing three lines of code: the call to maketext with that key, and then the two lines in ThisProject/I18N/en.pm). However, if you set "_AUTO => 1" in the %Lexicon in, ThisProject/I18N/en.pm (assuming that English (en) is the language that all your programmers will be using for this project's internal message keys), then you don't ever have to go adding lines like this "Couldn't find file \"[_1]\"!\n" => "Couldn't find file \"[_1]\"!\n", to ThisProject/I18N/en.pm, because if _AUTO is true there, then just looking for an entry with the key "Couldn't find file \"[_1]\"!\n" in that lexicon will cause it to be added, with that value! Note that the reason that keys that start with "_" are immune to _AUTO isn't anything generally magical about the underscore character -- I just wanted a way to have most lexicon keys be autoable, except for possibly a few, and I arbitrarily decided to use a leading underscore as a signal to distinguish those few. =head1 READONLY LEXICONS If your lexicon is a tied hash the simple act of caching the compiled value can be fatal. For example a L GDBM_READER tied hash will die with something like: gdbm store returned -1, errno 2, key "..." at ... All you need to do is turn on caching outside of the lexicon hash itself like so: sub init { my ($lh) = @_; ... $lh->{'use_external_lex_cache'} = 1; ... } And then instead of storing the compiled value in the lexicon hash it will store it in $lh->{'_external_lex_cache'} =head1 CONTROLLING LOOKUP FAILURE If you call $lh->maketext(I, ...parameters...), and there's no entry I in $lh's class's %Lexicon, nor in the superclass %Lexicon hash, I if we can't auto-make I (because either it starts with a "_", or because none of its lexicons have C<_AUTO =E 1,>), then we have failed to find a normal way to maketext I. What then happens in these failure conditions, depends on the $lh object's "fail" attribute. If the language handle has no "fail" attribute, maketext will simply throw an exception (i.e., it calls C, mentioning the I whose lookup failed, and naming the line number where the calling $lh->maketext(I,...) was. If the language handle has a "fail" attribute whose value is a coderef, then $lh->maketext(I,...params...) gives up and calls: return $that_subref->($lh, $key, @params); Otherwise, the "fail" attribute's value should be a string denoting a method name, so that $lh->maketext(I,...params...) can give up with: return $lh->$that_method_name($phrase, @params); The "fail" attribute can be accessed with the C method: # Set to a coderef: $lh->fail_with( \&failure_handler ); # Set to a method name: $lh->fail_with( 'failure_method' ); # Set to nothing (i.e., so failure throws a plain exception) $lh->fail_with( undef ); # Get the current value $handler = $lh->fail_with(); Now, as to what you may want to do with these handlers: Maybe you'd want to log what key failed for what class, and then die. Maybe you don't like C and instead you want to send the error message to STDOUT (or wherever) and then merely C. Or maybe you don't want to C at all! Maybe you could use a handler like this: # Make all lookups fall back onto an English value, # but only after we log it for later fingerpointing. my $lh_backup = ThisProject->get_handle('en'); open(LEX_FAIL_LOG, ">>wherever/lex.log") || die "GNAARGH $!"; sub lex_fail { my($failing_lh, $key, $params) = @_; print LEX_FAIL_LOG scalar(localtime), "\t", ref($failing_lh), "\t", $key, "\n"; return $lh_backup->maketext($key,@params); } Some users have expressed that they think this whole mechanism of having a "fail" attribute at all, seems a rather pointless complication. But I want Locale::Maketext to be usable for software projects of I scale and type; and different software projects have different ideas of what the right thing is to do in failure conditions. I could simply say that failure always throws an exception, and that if you want to be careful, you'll just have to wrap every call to $lh->maketext in an S. However, I want programmers to reserve the right (via the "fail" attribute) to treat lookup failure as something other than an exception of the same level of severity as a config file being unreadable, or some essential resource being inaccessible. One possibly useful value for the "fail" attribute is the method name "failure_handler_auto". This is a method defined in the class Locale::Maketext itself. You set it with: $lh->fail_with('failure_handler_auto'); Then when you call $lh->maketext(I, ...parameters...) and there's no I in any of those lexicons, maketext gives up with return $lh->failure_handler_auto($key, @params); But failure_handler_auto, instead of dying or anything, compiles $key, caching it in $lh->{'failure_lex'}{$key} = $complied and then calls the compiled value, and returns that. (I.e., if $key looks like bracket notation, $compiled is a sub, and we return &{$compiled}(@params); but if $key is just a plain string, we just return that.) The effect of using "failure_auto_handler" is like an AUTO lexicon, except that it 1) compiles $key even if it starts with "_", and 2) you have a record in the new hashref $lh->{'failure_lex'} of all the keys that have failed for this object. This should avoid your program dying -- as long as your keys aren't actually invalid as bracket code, and as long as they don't try calling methods that don't exist. "failure_auto_handler" may not be exactly what you want, but I hope it at least shows you that maketext failure can be mitigated in any number of very flexible ways. If you can formalize exactly what you want, you should be able to express that as a failure handler. You can even make it default for every object of a given class, by setting it in that class's init: sub init { my $lh = $_[0]; # a newborn handle $lh->SUPER::init(); $lh->fail_with('my_clever_failure_handler'); return; } sub my_clever_failure_handler { ...you clever things here... } =head1 HOW TO USE MAKETEXT Here is a brief checklist on how to use Maketext to localize applications: =over =item * Decide what system you'll use for lexicon keys. If you insist, you can use opaque IDs (if you're nostalgic for C), but I have better suggestions in the section "Entries in Each Lexicon", above. Assuming you opt for meaningful keys that double as values (like "Minimum ([_1]) is larger than maximum ([_2])!\n"), you'll have to settle on what language those should be in. For the sake of argument, I'll call this English, specifically American English, "en-US". =item * Create a class for your localization project. This is the name of the class that you'll use in the idiom: use Projname::L10N; my $lh = Projname::L10N->get_handle(...) || die "Language?"; Assuming you call your class Projname::L10N, create a class consisting minimally of: package Projname::L10N; use base qw(Locale::Maketext); ...any methods you might want all your languages to share... # And, assuming you want the base class to be an _AUTO lexicon, # as is discussed a few sections up: 1; =item * Create a class for the language your internal keys are in. Name the class after the language-tag for that language, in lowercase, with dashes changed to underscores. Assuming your project's first language is US English, you should call this Projname::L10N::en_us. It should consist minimally of: package Projname::L10N::en_us; use base qw(Projname::L10N); %Lexicon = ( '_AUTO' => 1, ); 1; (For the rest of this section, I'll assume that this "first language class" of Projname::L10N::en_us has _AUTO lexicon.) =item * Go and write your program. Everywhere in your program where you would say: print "Foobar $thing stuff\n"; instead do it thru maketext, using no variable interpolation in the key: print $lh->maketext("Foobar [_1] stuff\n", $thing); If you get tired of constantly saying Cmaketext>, consider making a functional wrapper for it, like so: use Projname::L10N; use vars qw($lh); $lh = Projname::L10N->get_handle(...) || die "Language?"; sub pmt (@) { print( $lh->maketext(@_)) } # "pmt" is short for "Print MakeText" $Carp::Verbose = 1; # so if maketext fails, we see made the call to pmt Besides whole phrases meant for output, anything language-dependent should be put into the class Projname::L10N::en_us, whether as methods, or as lexicon entries -- this is discussed in the section "Entries in Each Lexicon", above. =item * Once the program is otherwise done, and once its localization for the first language works right (via the data and methods in Projname::L10N::en_us), you can get together the data for translation. If your first language lexicon isn't an _AUTO lexicon, then you already have all the messages explicitly in the lexicon (or else you'd be getting exceptions thrown when you call $lh->maketext to get messages that aren't in there). But if you were (advisedly) lazy and are using an _AUTO lexicon, then you've got to make a list of all the phrases that you've so far been letting _AUTO generate for you. There are very many ways to assemble such a list. The most straightforward is to simply grep the source for every occurrence of "maketext" (or calls to wrappers around it, like the above C function), and to log the following phrase. =item * You may at this point want to consider whether your base class (Projname::L10N), from which all lexicons inherit from (Projname::L10N::en, Projname::L10N::es, etc.), should be an _AUTO lexicon. It may be true that in theory, all needed messages will be in each language class; but in the presumably unlikely or "impossible" case of lookup failure, you should consider whether your program should throw an exception, emit text in English (or whatever your project's first language is), or some more complex solution as described in the section "Controlling Lookup Failure", above. =item * Submit all messages/phrases/etc. to translators. (You may, in fact, want to start with localizing to I other language at first, if you're not sure that you've properly abstracted the language-dependent parts of your code.) Translators may request clarification of the situation in which a particular phrase is found. For example, in English we are entirely happy saying "I files found", regardless of whether we mean "I looked for files, and found I of them" or the rather distinct situation of "I looked for something else (like lines in files), and along the way I saw I files." This may involve rethinking things that you thought quite clear: should "Edit" on a toolbar be a noun ("editing") or a verb ("to edit")? Is there already a conventionalized way to express that menu option, separate from the target language's normal word for "to edit"? In all cases where the very common phenomenon of quantification (saying "I files", for B value of N) is involved, each translator should make clear what dependencies the number causes in the sentence. In many cases, dependency is limited to words adjacent to the number, in places where you might expect them ("I found the-?PLURAL I empty-?PLURAL directory-?PLURAL"), but in some cases there are unexpected dependencies ("I found-?PLURAL ..."!) as well as long-distance dependencies "The I directory-?PLURAL could not be deleted-?PLURAL"!). Remind the translators to consider the case where N is 0: "0 files found" isn't exactly natural-sounding in any language, but it may be unacceptable in many -- or it may condition special kinds of agreement (similar to English "I didN'T find ANY files"). Remember to ask your translators about numeral formatting in their language, so that you can override the C method as appropriate. Typical variables in number formatting are: what to use as a decimal point (comma? period?); what to use as a thousands separator (space? nonbreaking space? comma? period? small middot? prime? apostrophe?); and even whether the so-called "thousands separator" is actually for every third digit -- I've heard reports of two hundred thousand being expressible as "2,00,000" for some Indian (Subcontinental) languages, besides the less surprising "S<200 000>", "200.000", "200,000", and "200'000". Also, using a set of numeral glyphs other than the usual ASCII "0"-"9" might be appreciated, as via C for getting digits in Devanagari script (for Hindi, Konkani, others). The basic C method that Locale::Maketext provides should be good for many languages. For some languages, it might be useful to modify it (or its constituent C method) to take a plural form in the two-argument call to C (as in "[quant,_1,files]") if it's all-around easier to infer the singular form from the plural, than to infer the plural form from the singular. But for other languages (as is discussed at length in L), simple C/C is not enough. For the particularly problematic Slavic languages, what you may need is a method which you provide with the number, the citation form of the noun to quantify, and the case and gender that the sentence's syntax projects onto that noun slot. The method would then be responsible for determining what grammatical number that numeral projects onto its noun phrase, and what case and gender it may override the normal case and gender with; and then it would look up the noun in a lexicon providing all needed inflected forms. =item * You may also wish to discuss with the translators the question of how to relate different subforms of the same language tag, considering how this reacts with C's treatment of these. For example, if a user accepts interfaces in "en, fr", and you have interfaces available in "en-US" and "fr", what should they get? You may wish to resolve this by establishing that "en" and "en-US" are effectively synonymous, by having one class zero-derive from the other. For some languages this issue may never come up (Danish is rarely expressed as "da-DK", but instead is just "da"). And for other languages, the whole concept of a "generic" form may verge on being uselessly vague, particularly for interfaces involving voice media in forms of Arabic or Chinese. =item * Once you've localized your program/site/etc. for all desired languages, be sure to show the result (whether live, or via screenshots) to the translators. Once they approve, make every effort to have it then checked by at least one other speaker of that language. This holds true even when (or especially when) the translation is done by one of your own programmers. Some kinds of systems may be harder to find testers for than others, depending on the amount of domain-specific jargon and concepts involved -- it's easier to find people who can tell you whether they approve of your translation for "delete this message" in an email-via-Web interface, than to find people who can give you an informed opinion on your translation for "attribute value" in an XML query tool's interface. =back =head1 SEE ALSO I recommend reading all of these: L -- my I article about Maketext. It explains many important concepts underlying Locale::Maketext's design, and some insight into why Maketext is better than the plain old approach of having message catalogs that are just databases of sprintf formats. L is a sample application/module that uses Locale::Maketext to localize its messages. For a larger internationalized system, see also L. L. L. RFC 3066, I, as at http://sunsite.dk/RFC/rfc/rfc3066.html RFC 2277, I is at http://sunsite.dk/RFC/rfc/rfc2277.html -- much of it is just things of interest to protocol designers, but it explains some basic concepts, like the distinction between locales and language-tags. The manual for GNU C. The gettext dist is available in C -- get a recent gettext tarball and look in its "doc/" directory, there's an easily browsable HTML version in there. The gettext documentation asks lots of questions worth thinking about, even if some of their answers are sometimes wonky, particularly where they start talking about pluralization. The Locale/Maketext.pm source. Obverse that the module is much shorter than its documentation! =head1 COPYRIGHT AND DISCLAIMER Copyright (c) 1999-2004 Sean M. Burke. All rights reserved. This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. This program is distributed in the hope that it will be useful, but without any warranty; without even the implied warranty of merchantability or fitness for a particular purpose. =head1 AUTHOR Sean M. Burke C =cut # Getopt::Long.pm -- Universal options parsing package Getopt::Long; # RCS Status : $Id: Long.pm,v 2.76 2009/03/30 20:54:30 jv Exp $ # Author : Johan Vromans # Created On : Tue Sep 11 15:00:12 1990 # Last Modified By: Johan Vromans # Last Modified On: Mon Mar 30 22:51:17 2009 # Update Count : 1601 # Status : Released ################ Module Preamble ################ use 5.004; use strict; use vars qw($VERSION); $VERSION = 2.38; # For testing versions only. #use vars qw($VERSION_STRING); #$VERSION_STRING = "2.38"; use Exporter; use vars qw(@ISA @EXPORT @EXPORT_OK); @ISA = qw(Exporter); # Exported subroutines. sub GetOptions(@); # always sub GetOptionsFromArray(@); # on demand sub GetOptionsFromString(@); # on demand sub Configure(@); # on demand sub HelpMessage(@); # on demand sub VersionMessage(@); # in demand BEGIN { # Init immediately so their contents can be used in the 'use vars' below. @EXPORT = qw(&GetOptions $REQUIRE_ORDER $PERMUTE $RETURN_IN_ORDER); @EXPORT_OK = qw(&HelpMessage &VersionMessage &Configure &GetOptionsFromArray &GetOptionsFromString); } # User visible variables. use vars @EXPORT, @EXPORT_OK; use vars qw($error $debug $major_version $minor_version); # Deprecated visible variables. use vars qw($autoabbrev $getopt_compat $ignorecase $bundling $order $passthrough); # Official invisible variables. use vars qw($genprefix $caller $gnu_compat $auto_help $auto_version $longprefix); # Public subroutines. sub config(@); # deprecated name # Private subroutines. sub ConfigDefaults(); sub ParseOptionSpec($$); sub OptCtl($); sub FindOption($$$$$); sub ValidValue ($$$$$); ################ Local Variables ################ # $requested_version holds the version that was mentioned in the 'use' # or 'require', if any. It can be used to enable or disable specific # features. my $requested_version = 0; ################ Resident subroutines ################ sub ConfigDefaults() { # Handle POSIX compliancy. if ( defined $ENV{"POSIXLY_CORRECT"} ) { $genprefix = "(--|-)"; $autoabbrev = 0; # no automatic abbrev of options $bundling = 0; # no bundling of single letter switches $getopt_compat = 0; # disallow '+' to start options $order = $REQUIRE_ORDER; } else { $genprefix = "(--|-|\\+)"; $autoabbrev = 1; # automatic abbrev of options $bundling = 0; # bundling off by default $getopt_compat = 1; # allow '+' to start options $order = $PERMUTE; } # Other configurable settings. $debug = 0; # for debugging $error = 0; # error tally $ignorecase = 1; # ignore case when matching options $passthrough = 0; # leave unrecognized options alone $gnu_compat = 0; # require --opt=val if value is optional $longprefix = "(--)"; # what does a long prefix look like } # Override import. sub import { my $pkg = shift; # package my @syms = (); # symbols to import my @config = (); # configuration my $dest = \@syms; # symbols first for ( @_ ) { if ( $_ eq ':config' ) { $dest = \@config; # config next next; } push(@$dest, $_); # push } # Hide one level and call super. local $Exporter::ExportLevel = 1; push(@syms, qw(&GetOptions)) if @syms; # always export GetOptions $pkg->SUPER::import(@syms); # And configure. Configure(@config) if @config; } ################ Initialization ################ # Values for $order. See GNU getopt.c for details. ($REQUIRE_ORDER, $PERMUTE, $RETURN_IN_ORDER) = (0..2); # Version major/minor numbers. ($major_version, $minor_version) = $VERSION =~ /^(\d+)\.(\d+)/; ConfigDefaults(); ################ OO Interface ################ package Getopt::Long::Parser; # Store a copy of the default configuration. Since ConfigDefaults has # just been called, what we get from Configure is the default. my $default_config = do { Getopt::Long::Configure () }; sub new { my $that = shift; my $class = ref($that) || $that; my %atts = @_; # Register the callers package. my $self = { caller_pkg => (caller)[0] }; bless ($self, $class); # Process config attributes. if ( defined $atts{config} ) { my $save = Getopt::Long::Configure ($default_config, @{$atts{config}}); $self->{settings} = Getopt::Long::Configure ($save); delete ($atts{config}); } # Else use default config. else { $self->{settings} = $default_config; } if ( %atts ) { # Oops die(__PACKAGE__.": unhandled attributes: ". join(" ", sort(keys(%atts)))."\n"); } $self; } sub configure { my ($self) = shift; # Restore settings, merge new settings in. my $save = Getopt::Long::Configure ($self->{settings}, @_); # Restore orig config and save the new config. $self->{settings} = Getopt::Long::Configure ($save); } sub getoptions { my ($self) = shift; # Restore config settings. my $save = Getopt::Long::Configure ($self->{settings}); # Call main routine. my $ret = 0; $Getopt::Long::caller = $self->{caller_pkg}; eval { # Locally set exception handler to default, otherwise it will # be called implicitly here, and again explicitly when we try # to deliver the messages. local ($SIG{__DIE__}) = 'DEFAULT'; $ret = Getopt::Long::GetOptions (@_); }; # Restore saved settings. Getopt::Long::Configure ($save); # Handle errors and return value. die ($@) if $@; return $ret; } package Getopt::Long; ################ Back to Normal ################ # Indices in option control info. # Note that ParseOptions uses the fields directly. Search for 'hard-wired'. use constant CTL_TYPE => 0; #use constant CTL_TYPE_FLAG => ''; #use constant CTL_TYPE_NEG => '!'; #use constant CTL_TYPE_INCR => '+'; #use constant CTL_TYPE_INT => 'i'; #use constant CTL_TYPE_INTINC => 'I'; #use constant CTL_TYPE_XINT => 'o'; #use constant CTL_TYPE_FLOAT => 'f'; #use constant CTL_TYPE_STRING => 's'; use constant CTL_CNAME => 1; use constant CTL_DEFAULT => 2; use constant CTL_DEST => 3; use constant CTL_DEST_SCALAR => 0; use constant CTL_DEST_ARRAY => 1; use constant CTL_DEST_HASH => 2; use constant CTL_DEST_CODE => 3; use constant CTL_AMIN => 4; use constant CTL_AMAX => 5; # FFU. #use constant CTL_RANGE => ; #use constant CTL_REPEAT => ; # Rather liberal patterns to match numbers. use constant PAT_INT => "[-+]?_*[0-9][0-9_]*"; use constant PAT_XINT => "(?:". "[-+]?_*[1-9][0-9_]*". "|". "0x_*[0-9a-f][0-9a-f_]*". "|". "0b_*[01][01_]*". "|". "0[0-7_]*". ")"; use constant PAT_FLOAT => "[-+]?[0-9._]+(\.[0-9_]+)?([eE][-+]?[0-9_]+)?"; sub GetOptions(@) { # Shift in default array. unshift(@_, \@ARGV); # Try to keep caller() and Carp consitent. goto &GetOptionsFromArray; } sub GetOptionsFromString(@) { my ($string) = shift; require Text::ParseWords; my $args = [ Text::ParseWords::shellwords($string) ]; $caller ||= (caller)[0]; # current context my $ret = GetOptionsFromArray($args, @_); return ( $ret, $args ) if wantarray; if ( @$args ) { $ret = 0; warn("GetOptionsFromString: Excess data \"@$args\" in string \"$string\"\n"); } $ret; } sub GetOptionsFromArray(@) { my ($argv, @optionlist) = @_; # local copy of the option descriptions my $argend = '--'; # option list terminator my %opctl = (); # table of option specs my $pkg = $caller || (caller)[0]; # current context # Needed if linkage is omitted. my @ret = (); # accum for non-options my %linkage; # linkage my $userlinkage; # user supplied HASH my $opt; # current option my $prefix = $genprefix; # current prefix $error = ''; if ( $debug ) { # Avoid some warnings if debugging. local ($^W) = 0; print STDERR ("Getopt::Long $Getopt::Long::VERSION (", '$Revision: 2.76 $', ") ", "called from package \"$pkg\".", "\n ", "argv: (@$argv)", "\n ", "autoabbrev=$autoabbrev,". "bundling=$bundling,", "getopt_compat=$getopt_compat,", "gnu_compat=$gnu_compat,", "order=$order,", "\n ", "ignorecase=$ignorecase,", "requested_version=$requested_version,", "passthrough=$passthrough,", "genprefix=\"$genprefix\",", "longprefix=\"$longprefix\".", "\n"); } # Check for ref HASH as first argument. # First argument may be an object. It's OK to use this as long # as it is really a hash underneath. $userlinkage = undef; if ( @optionlist && ref($optionlist[0]) and UNIVERSAL::isa($optionlist[0],'HASH') ) { $userlinkage = shift (@optionlist); print STDERR ("=> user linkage: $userlinkage\n") if $debug; } # See if the first element of the optionlist contains option # starter characters. # Be careful not to interpret '<>' as option starters. if ( @optionlist && $optionlist[0] =~ /^\W+$/ && !($optionlist[0] eq '<>' && @optionlist > 0 && ref($optionlist[1])) ) { $prefix = shift (@optionlist); # Turn into regexp. Needs to be parenthesized! $prefix =~ s/(\W)/\\$1/g; $prefix = "([" . $prefix . "])"; print STDERR ("=> prefix=\"$prefix\"\n") if $debug; } # Verify correctness of optionlist. %opctl = (); while ( @optionlist ) { my $opt = shift (@optionlist); unless ( defined($opt) ) { $error .= "Undefined argument in option spec\n"; next; } # Strip leading prefix so people can specify "--foo=i" if they like. $opt = $+ if $opt =~ /^$prefix+(.*)$/s; if ( $opt eq '<>' ) { if ( (defined $userlinkage) && !(@optionlist > 0 && ref($optionlist[0])) && (exists $userlinkage->{$opt}) && ref($userlinkage->{$opt}) ) { unshift (@optionlist, $userlinkage->{$opt}); } unless ( @optionlist > 0 && ref($optionlist[0]) && ref($optionlist[0]) eq 'CODE' ) { $error .= "Option spec <> requires a reference to a subroutine\n"; # Kill the linkage (to avoid another error). shift (@optionlist) if @optionlist && ref($optionlist[0]); next; } $linkage{'<>'} = shift (@optionlist); next; } # Parse option spec. my ($name, $orig) = ParseOptionSpec ($opt, \%opctl); unless ( defined $name ) { # Failed. $orig contains the error message. Sorry for the abuse. $error .= $orig; # Kill the linkage (to avoid another error). shift (@optionlist) if @optionlist && ref($optionlist[0]); next; } # If no linkage is supplied in the @optionlist, copy it from # the userlinkage if available. if ( defined $userlinkage ) { unless ( @optionlist > 0 && ref($optionlist[0]) ) { if ( exists $userlinkage->{$orig} && ref($userlinkage->{$orig}) ) { print STDERR ("=> found userlinkage for \"$orig\": ", "$userlinkage->{$orig}\n") if $debug; unshift (@optionlist, $userlinkage->{$orig}); } else { # Do nothing. Being undefined will be handled later. next; } } } # Copy the linkage. If omitted, link to global variable. if ( @optionlist > 0 && ref($optionlist[0]) ) { print STDERR ("=> link \"$orig\" to $optionlist[0]\n") if $debug; my $rl = ref($linkage{$orig} = shift (@optionlist)); if ( $rl eq "ARRAY" ) { $opctl{$name}[CTL_DEST] = CTL_DEST_ARRAY; } elsif ( $rl eq "HASH" ) { $opctl{$name}[CTL_DEST] = CTL_DEST_HASH; } elsif ( $rl eq "SCALAR" || $rl eq "REF" ) { # if ( $opctl{$name}[CTL_DEST] == CTL_DEST_ARRAY ) { # my $t = $linkage{$orig}; # $$t = $linkage{$orig} = []; # } # elsif ( $opctl{$name}[CTL_DEST] == CTL_DEST_HASH ) { # } # else { # Ok. # } } elsif ( $rl eq "CODE" ) { # Ok. } else { $error .= "Invalid option linkage for \"$opt\"\n"; } } else { # Link to global $opt_XXX variable. # Make sure a valid perl identifier results. my $ov = $orig; $ov =~ s/\W/_/g; if ( $opctl{$name}[CTL_DEST] == CTL_DEST_ARRAY ) { print STDERR ("=> link \"$orig\" to \@$pkg","::opt_$ov\n") if $debug; eval ("\$linkage{\$orig} = \\\@".$pkg."::opt_$ov;"); } elsif ( $opctl{$name}[CTL_DEST] == CTL_DEST_HASH ) { print STDERR ("=> link \"$orig\" to \%$pkg","::opt_$ov\n") if $debug; eval ("\$linkage{\$orig} = \\\%".$pkg."::opt_$ov;"); } else { print STDERR ("=> link \"$orig\" to \$$pkg","::opt_$ov\n") if $debug; eval ("\$linkage{\$orig} = \\\$".$pkg."::opt_$ov;"); } } if ( $opctl{$name}[CTL_TYPE] eq 'I' && ( $opctl{$name}[CTL_DEST] == CTL_DEST_ARRAY || $opctl{$name}[CTL_DEST] == CTL_DEST_HASH ) ) { $error .= "Invalid option linkage for \"$opt\"\n"; } } # Bail out if errors found. die ($error) if $error; $error = 0; # Supply --version and --help support, if needed and allowed. if ( defined($auto_version) ? $auto_version : ($requested_version >= 2.3203) ) { if ( !defined($opctl{version}) ) { $opctl{version} = ['','version',0,CTL_DEST_CODE,undef]; $linkage{version} = \&VersionMessage; } $auto_version = 1; } if ( defined($auto_help) ? $auto_help : ($requested_version >= 2.3203) ) { if ( !defined($opctl{help}) && !defined($opctl{'?'}) ) { $opctl{help} = $opctl{'?'} = ['','help',0,CTL_DEST_CODE,undef]; $linkage{help} = \&HelpMessage; } $auto_help = 1; } # Show the options tables if debugging. if ( $debug ) { my ($arrow, $k, $v); $arrow = "=> "; while ( ($k,$v) = each(%opctl) ) { print STDERR ($arrow, "\$opctl{$k} = $v ", OptCtl($v), "\n"); $arrow = " "; } } # Process argument list my $goon = 1; while ( $goon && @$argv > 0 ) { # Get next argument. $opt = shift (@$argv); print STDERR ("=> arg \"", $opt, "\"\n") if $debug; # Double dash is option list terminator. if ( $opt eq $argend ) { push (@ret, $argend) if $passthrough; last; } # Look it up. my $tryopt = $opt; my $found; # success status my $key; # key (if hash type) my $arg; # option argument my $ctl; # the opctl entry ($found, $opt, $ctl, $arg, $key) = FindOption ($argv, $prefix, $argend, $opt, \%opctl); if ( $found ) { # FindOption undefines $opt in case of errors. next unless defined $opt; my $argcnt = 0; while ( defined $arg ) { # Get the canonical name. print STDERR ("=> cname for \"$opt\" is ") if $debug; $opt = $ctl->[CTL_CNAME]; print STDERR ("\"$ctl->[CTL_CNAME]\"\n") if $debug; if ( defined $linkage{$opt} ) { print STDERR ("=> ref(\$L{$opt}) -> ", ref($linkage{$opt}), "\n") if $debug; if ( ref($linkage{$opt}) eq 'SCALAR' || ref($linkage{$opt}) eq 'REF' ) { if ( $ctl->[CTL_TYPE] eq '+' ) { print STDERR ("=> \$\$L{$opt} += \"$arg\"\n") if $debug; if ( defined ${$linkage{$opt}} ) { ${$linkage{$opt}} += $arg; } else { ${$linkage{$opt}} = $arg; } } elsif ( $ctl->[CTL_DEST] == CTL_DEST_ARRAY ) { print STDERR ("=> ref(\$L{$opt}) auto-vivified", " to ARRAY\n") if $debug; my $t = $linkage{$opt}; $$t = $linkage{$opt} = []; print STDERR ("=> push(\@{\$L{$opt}, \"$arg\")\n") if $debug; push (@{$linkage{$opt}}, $arg); } elsif ( $ctl->[CTL_DEST] == CTL_DEST_HASH ) { print STDERR ("=> ref(\$L{$opt}) auto-vivified", " to HASH\n") if $debug; my $t = $linkage{$opt}; $$t = $linkage{$opt} = {}; print STDERR ("=> \$\$L{$opt}->{$key} = \"$arg\"\n") if $debug; $linkage{$opt}->{$key} = $arg; } else { print STDERR ("=> \$\$L{$opt} = \"$arg\"\n") if $debug; ${$linkage{$opt}} = $arg; } } elsif ( ref($linkage{$opt}) eq 'ARRAY' ) { print STDERR ("=> push(\@{\$L{$opt}, \"$arg\")\n") if $debug; push (@{$linkage{$opt}}, $arg); } elsif ( ref($linkage{$opt}) eq 'HASH' ) { print STDERR ("=> \$\$L{$opt}->{$key} = \"$arg\"\n") if $debug; $linkage{$opt}->{$key} = $arg; } elsif ( ref($linkage{$opt}) eq 'CODE' ) { print STDERR ("=> &L{$opt}(\"$opt\"", $ctl->[CTL_DEST] == CTL_DEST_HASH ? ", \"$key\"" : "", ", \"$arg\")\n") if $debug; my $eval_error = do { local $@; local $SIG{__DIE__} = 'DEFAULT'; eval { &{$linkage{$opt}} (Getopt::Long::CallBack->new (name => $opt, ctl => $ctl, opctl => \%opctl, linkage => \%linkage, prefix => $prefix, ), $ctl->[CTL_DEST] == CTL_DEST_HASH ? ($key) : (), $arg); }; $@; }; print STDERR ("=> die($eval_error)\n") if $debug && $eval_error ne ''; if ( $eval_error =~ /^!/ ) { if ( $eval_error =~ /^!FINISH\b/ ) { $goon = 0; } } elsif ( $eval_error ne '' ) { warn ($eval_error); $error++; } } else { print STDERR ("Invalid REF type \"", ref($linkage{$opt}), "\" in linkage\n"); die("Getopt::Long -- internal error!\n"); } } # No entry in linkage means entry in userlinkage. elsif ( $ctl->[CTL_DEST] == CTL_DEST_ARRAY ) { if ( defined $userlinkage->{$opt} ) { print STDERR ("=> push(\@{\$L{$opt}}, \"$arg\")\n") if $debug; push (@{$userlinkage->{$opt}}, $arg); } else { print STDERR ("=>\$L{$opt} = [\"$arg\"]\n") if $debug; $userlinkage->{$opt} = [$arg]; } } elsif ( $ctl->[CTL_DEST] == CTL_DEST_HASH ) { if ( defined $userlinkage->{$opt} ) { print STDERR ("=> \$L{$opt}->{$key} = \"$arg\"\n") if $debug; $userlinkage->{$opt}->{$key} = $arg; } else { print STDERR ("=>\$L{$opt} = {$key => \"$arg\"}\n") if $debug; $userlinkage->{$opt} = {$key => $arg}; } } else { if ( $ctl->[CTL_TYPE] eq '+' ) { print STDERR ("=> \$L{$opt} += \"$arg\"\n") if $debug; if ( defined $userlinkage->{$opt} ) { $userlinkage->{$opt} += $arg; } else { $userlinkage->{$opt} = $arg; } } else { print STDERR ("=>\$L{$opt} = \"$arg\"\n") if $debug; $userlinkage->{$opt} = $arg; } } $argcnt++; last if $argcnt >= $ctl->[CTL_AMAX] && $ctl->[CTL_AMAX] != -1; undef($arg); # Need more args? if ( $argcnt < $ctl->[CTL_AMIN] ) { if ( @$argv ) { if ( ValidValue($ctl, $argv->[0], 1, $argend, $prefix) ) { $arg = shift(@$argv); $arg =~ tr/_//d if $ctl->[CTL_TYPE] =~ /^[iIo]$/; ($key,$arg) = $arg =~ /^([^=]+)=(.*)/ if $ctl->[CTL_DEST] == CTL_DEST_HASH; next; } warn("Value \"$$argv[0]\" invalid for option $opt\n"); $error++; } else { warn("Insufficient arguments for option $opt\n"); $error++; } } # Any more args? if ( @$argv && ValidValue($ctl, $argv->[0], 0, $argend, $prefix) ) { $arg = shift(@$argv); $arg =~ tr/_//d if $ctl->[CTL_TYPE] =~ /^[iIo]$/; ($key,$arg) = $arg =~ /^([^=]+)=(.*)/ if $ctl->[CTL_DEST] == CTL_DEST_HASH; next; } } } # Not an option. Save it if we $PERMUTE and don't have a <>. elsif ( $order == $PERMUTE ) { # Try non-options call-back. my $cb; if ( (defined ($cb = $linkage{'<>'})) ) { print STDERR ("=> &L{$tryopt}(\"$tryopt\")\n") if $debug; my $eval_error = do { local $@; local $SIG{__DIE__} = 'DEFAULT'; eval { &$cb (Getopt::Long::CallBack->new (name => $tryopt, ctl => $ctl, opctl => \%opctl, linkage => \%linkage, prefix => $prefix, )); }; $@; }; print STDERR ("=> die($eval_error)\n") if $debug && $eval_error ne ''; if ( $eval_error =~ /^!/ ) { if ( $eval_error =~ /^!FINISH\b/ ) { $goon = 0; } } elsif ( $eval_error ne '' ) { warn ($eval_error); $error++; } } else { print STDERR ("=> saving \"$tryopt\" ", "(not an option, may permute)\n") if $debug; push (@ret, $tryopt); } next; } # ...otherwise, terminate. else { # Push this one back and exit. unshift (@$argv, $tryopt); return ($error == 0); } } # Finish. if ( @ret && $order == $PERMUTE ) { # Push back accumulated arguments print STDERR ("=> restoring \"", join('" "', @ret), "\"\n") if $debug; unshift (@$argv, @ret); } return ($error == 0); } # A readable representation of what's in an optbl. sub OptCtl ($) { my ($v) = @_; my @v = map { defined($_) ? ($_) : ("") } @$v; "[". join(",", "\"$v[CTL_TYPE]\"", "\"$v[CTL_CNAME]\"", "\"$v[CTL_DEFAULT]\"", ("\$","\@","\%","\&")[$v[CTL_DEST] || 0], $v[CTL_AMIN] || '', $v[CTL_AMAX] || '', # $v[CTL_RANGE] || '', # $v[CTL_REPEAT] || '', ). "]"; } # Parse an option specification and fill the tables. sub ParseOptionSpec ($$) { my ($opt, $opctl) = @_; # Match option spec. if ( $opt !~ m;^ ( # Option name (?: \w+[-\w]* ) # Alias names, or "?" (?: \| (?: \? | \w[-\w]* ) )* )? ( # Either modifiers ... [!+] | # ... or a value/dest/repeat specification [=:] [ionfs] [@%]? (?: \{\d*,?\d*\} )? | # ... or an optional-with-default spec : (?: -?\d+ | \+ ) [@%]? )? $;x ) { return (undef, "Error in option spec: \"$opt\"\n"); } my ($names, $spec) = ($1, $2); $spec = '' unless defined $spec; # $orig keeps track of the primary name the user specified. # This name will be used for the internal or external linkage. # In other words, if the user specifies "FoO|BaR", it will # match any case combinations of 'foo' and 'bar', but if a global # variable needs to be set, it will be $opt_FoO in the exact case # as specified. my $orig; my @names; if ( defined $names ) { @names = split (/\|/, $names); $orig = $names[0]; } else { @names = (''); $orig = ''; } # Construct the opctl entries. my $entry; if ( $spec eq '' || $spec eq '+' || $spec eq '!' ) { # Fields are hard-wired here. $entry = [$spec,$orig,undef,CTL_DEST_SCALAR,0,0]; } elsif ( $spec =~ /^:(-?\d+|\+)([@%])?$/ ) { my $def = $1; my $dest = $2; my $type = $def eq '+' ? 'I' : 'i'; $dest ||= '$'; $dest = $dest eq '@' ? CTL_DEST_ARRAY : $dest eq '%' ? CTL_DEST_HASH : CTL_DEST_SCALAR; # Fields are hard-wired here. $entry = [$type,$orig,$def eq '+' ? undef : $def, $dest,0,1]; } else { my ($mand, $type, $dest) = $spec =~ /^([=:])([ionfs])([@%])?(\{(\d+)?(,)?(\d+)?\})?$/; return (undef, "Cannot repeat while bundling: \"$opt\"\n") if $bundling && defined($4); my ($mi, $cm, $ma) = ($5, $6, $7); return (undef, "{0} is useless in option spec: \"$opt\"\n") if defined($mi) && !$mi && !defined($ma) && !defined($cm); $type = 'i' if $type eq 'n'; $dest ||= '$'; $dest = $dest eq '@' ? CTL_DEST_ARRAY : $dest eq '%' ? CTL_DEST_HASH : CTL_DEST_SCALAR; # Default minargs to 1/0 depending on mand status. $mi = $mand eq '=' ? 1 : 0 unless defined $mi; # Adjust mand status according to minargs. $mand = $mi ? '=' : ':'; # Adjust maxargs. $ma = $mi ? $mi : 1 unless defined $ma || defined $cm; return (undef, "Max must be greater than zero in option spec: \"$opt\"\n") if defined($ma) && !$ma; return (undef, "Max less than min in option spec: \"$opt\"\n") if defined($ma) && $ma < $mi; # Fields are hard-wired here. $entry = [$type,$orig,undef,$dest,$mi,$ma||-1]; } # Process all names. First is canonical, the rest are aliases. my $dups = ''; foreach ( @names ) { $_ = lc ($_) if $ignorecase > (($bundling && length($_) == 1) ? 1 : 0); if ( exists $opctl->{$_} ) { $dups .= "Duplicate specification \"$opt\" for option \"$_\"\n"; } if ( $spec eq '!' ) { $opctl->{"no$_"} = $entry; $opctl->{"no-$_"} = $entry; $opctl->{$_} = [@$entry]; $opctl->{$_}->[CTL_TYPE] = ''; } else { $opctl->{$_} = $entry; } } if ( $dups && $^W ) { foreach ( split(/\n+/, $dups) ) { warn($_."\n"); } } ($names[0], $orig); } # Option lookup. sub FindOption ($$$$$) { # returns (1, $opt, $ctl, $arg, $key) if okay, # returns (1, undef) if option in error, # returns (0) otherwise. my ($argv, $prefix, $argend, $opt, $opctl) = @_; print STDERR ("=> find \"$opt\"\n") if $debug; return (0) unless $opt =~ /^$prefix(.*)$/s; return (0) if $opt eq "-" && !defined $opctl->{''}; $opt = $+; my $starter = $1; print STDERR ("=> split \"$starter\"+\"$opt\"\n") if $debug; my $optarg; # value supplied with --opt=value my $rest; # remainder from unbundling # If it is a long option, it may include the value. # With getopt_compat, only if not bundling. if ( ($starter=~/^$longprefix$/ || ($getopt_compat && ($bundling == 0 || $bundling == 2))) && $opt =~ /^([^=]+)=(.*)$/s ) { $opt = $1; $optarg = $2; print STDERR ("=> option \"", $opt, "\", optarg = \"$optarg\"\n") if $debug; } #### Look it up ### my $tryopt = $opt; # option to try if ( $bundling && $starter eq '-' ) { # To try overrides, obey case ignore. $tryopt = $ignorecase ? lc($opt) : $opt; # If bundling == 2, long options can override bundles. if ( $bundling == 2 && length($tryopt) > 1 && defined ($opctl->{$tryopt}) ) { print STDERR ("=> $starter$tryopt overrides unbundling\n") if $debug; } else { $tryopt = $opt; # Unbundle single letter option. $rest = length ($tryopt) > 0 ? substr ($tryopt, 1) : ''; $tryopt = substr ($tryopt, 0, 1); $tryopt = lc ($tryopt) if $ignorecase > 1; print STDERR ("=> $starter$tryopt unbundled from ", "$starter$tryopt$rest\n") if $debug; $rest = undef unless $rest ne ''; } } # Try auto-abbreviation. elsif ( $autoabbrev && $opt ne "" ) { # Sort the possible long option names. my @names = sort(keys (%$opctl)); # Downcase if allowed. $opt = lc ($opt) if $ignorecase; $tryopt = $opt; # Turn option name into pattern. my $pat = quotemeta ($opt); # Look up in option names. my @hits = grep (/^$pat/, @names); print STDERR ("=> ", scalar(@hits), " hits (@hits) with \"$pat\" ", "out of ", scalar(@names), "\n") if $debug; # Check for ambiguous results. unless ( (@hits <= 1) || (grep ($_ eq $opt, @hits) == 1) ) { # See if all matches are for the same option. my %hit; foreach ( @hits ) { my $hit = $_; $hit = $opctl->{$hit}->[CTL_CNAME] if defined $opctl->{$hit}->[CTL_CNAME]; $hit{$hit} = 1; } # Remove auto-supplied options (version, help). if ( keys(%hit) == 2 ) { if ( $auto_version && exists($hit{version}) ) { delete $hit{version}; } elsif ( $auto_help && exists($hit{help}) ) { delete $hit{help}; } } # Now see if it really is ambiguous. unless ( keys(%hit) == 1 ) { return (0) if $passthrough; warn ("Option ", $opt, " is ambiguous (", join(", ", @hits), ")\n"); $error++; return (1, undef); } @hits = keys(%hit); } # Complete the option name, if appropriate. if ( @hits == 1 && $hits[0] ne $opt ) { $tryopt = $hits[0]; $tryopt = lc ($tryopt) if $ignorecase; print STDERR ("=> option \"$opt\" -> \"$tryopt\"\n") if $debug; } } # Map to all lowercase if ignoring case. elsif ( $ignorecase ) { $tryopt = lc ($opt); } # Check validity by fetching the info. my $ctl = $opctl->{$tryopt}; unless ( defined $ctl ) { return (0) if $passthrough; # Pretend one char when bundling. if ( $bundling == 1 && length($starter) == 1 ) { $opt = substr($opt,0,1); unshift (@$argv, $starter.$rest) if defined $rest; } if ( $opt eq "" ) { warn ("Missing option after ", $starter, "\n"); } else { warn ("Unknown option: ", $opt, "\n"); } $error++; return (1, undef); } # Apparently valid. $opt = $tryopt; print STDERR ("=> found ", OptCtl($ctl), " for \"", $opt, "\"\n") if $debug; #### Determine argument status #### # If it is an option w/o argument, we're almost finished with it. my $type = $ctl->[CTL_TYPE]; my $arg; if ( $type eq '' || $type eq '!' || $type eq '+' ) { if ( defined $optarg ) { return (0) if $passthrough; warn ("Option ", $opt, " does not take an argument\n"); $error++; undef $opt; } elsif ( $type eq '' || $type eq '+' ) { # Supply explicit value. $arg = 1; } else { $opt =~ s/^no-?//i; # strip NO prefix $arg = 0; # supply explicit value } unshift (@$argv, $starter.$rest) if defined $rest; return (1, $opt, $ctl, $arg); } # Get mandatory status and type info. my $mand = $ctl->[CTL_AMIN]; # Check if there is an option argument available. if ( $gnu_compat && defined $optarg && $optarg eq '' ) { return (1, $opt, $ctl, $type eq 's' ? '' : 0) ;#unless $mand; $optarg = 0 unless $type eq 's'; } # Check if there is an option argument available. if ( defined $optarg ? ($optarg eq '') : !(defined $rest || @$argv > 0) ) { # Complain if this option needs an argument. # if ( $mand && !($type eq 's' ? defined($optarg) : 0) ) { if ( $mand ) { return (0) if $passthrough; warn ("Option ", $opt, " requires an argument\n"); $error++; return (1, undef); } if ( $type eq 'I' ) { # Fake incremental type. my @c = @$ctl; $c[CTL_TYPE] = '+'; return (1, $opt, \@c, 1); } return (1, $opt, $ctl, defined($ctl->[CTL_DEFAULT]) ? $ctl->[CTL_DEFAULT] : $type eq 's' ? '' : 0); } # Get (possibly optional) argument. $arg = (defined $rest ? $rest : (defined $optarg ? $optarg : shift (@$argv))); # Get key if this is a "name=value" pair for a hash option. my $key; if ($ctl->[CTL_DEST] == CTL_DEST_HASH && defined $arg) { ($key, $arg) = ($arg =~ /^([^=]*)=(.*)$/s) ? ($1, $2) : ($arg, defined($ctl->[CTL_DEFAULT]) ? $ctl->[CTL_DEFAULT] : ($mand ? undef : ($type eq 's' ? "" : 1))); if (! defined $arg) { warn ("Option $opt, key \"$key\", requires a value\n"); $error++; # Push back. unshift (@$argv, $starter.$rest) if defined $rest; return (1, undef); } } #### Check if the argument is valid for this option #### my $key_valid = $ctl->[CTL_DEST] == CTL_DEST_HASH ? "[^=]+=" : ""; if ( $type eq 's' ) { # string # A mandatory string takes anything. return (1, $opt, $ctl, $arg, $key) if $mand; # Same for optional string as a hash value return (1, $opt, $ctl, $arg, $key) if $ctl->[CTL_DEST] == CTL_DEST_HASH; # An optional string takes almost anything. return (1, $opt, $ctl, $arg, $key) if defined $optarg || defined $rest; return (1, $opt, $ctl, $arg, $key) if $arg eq "-"; # ?? # Check for option or option list terminator. if ($arg eq $argend || $arg =~ /^$prefix.+/) { # Push back. unshift (@$argv, $arg); # Supply empty value. $arg = ''; } } elsif ( $type eq 'i' # numeric/integer || $type eq 'I' # numeric/integer w/ incr default || $type eq 'o' ) { # dec/oct/hex/bin value my $o_valid = $type eq 'o' ? PAT_XINT : PAT_INT; if ( $bundling && defined $rest && $rest =~ /^($key_valid)($o_valid)(.*)$/si ) { ($key, $arg, $rest) = ($1, $2, $+); chop($key) if $key; $arg = ($type eq 'o' && $arg =~ /^0/) ? oct($arg) : 0+$arg; unshift (@$argv, $starter.$rest) if defined $rest && $rest ne ''; } elsif ( $arg =~ /^$o_valid$/si ) { $arg =~ tr/_//d; $arg = ($type eq 'o' && $arg =~ /^0/) ? oct($arg) : 0+$arg; } else { if ( defined $optarg || $mand ) { if ( $passthrough ) { unshift (@$argv, defined $rest ? $starter.$rest : $arg) unless defined $optarg; return (0); } warn ("Value \"", $arg, "\" invalid for option ", $opt, " (", $type eq 'o' ? "extended " : '', "number expected)\n"); $error++; # Push back. unshift (@$argv, $starter.$rest) if defined $rest; return (1, undef); } else { # Push back. unshift (@$argv, defined $rest ? $starter.$rest : $arg); if ( $type eq 'I' ) { # Fake incremental type. my @c = @$ctl; $c[CTL_TYPE] = '+'; return (1, $opt, \@c, 1); } # Supply default value. $arg = defined($ctl->[CTL_DEFAULT]) ? $ctl->[CTL_DEFAULT] : 0; } } } elsif ( $type eq 'f' ) { # real number, int is also ok # We require at least one digit before a point or 'e', # and at least one digit following the point and 'e'. # [-]NN[.NN][eNN] my $o_valid = PAT_FLOAT; if ( $bundling && defined $rest && $rest =~ /^($key_valid)($o_valid)(.*)$/s ) { $arg =~ tr/_//d; ($key, $arg, $rest) = ($1, $2, $+); chop($key) if $key; unshift (@$argv, $starter.$rest) if defined $rest && $rest ne ''; } elsif ( $arg =~ /^$o_valid$/ ) { $arg =~ tr/_//d; } else { if ( defined $optarg || $mand ) { if ( $passthrough ) { unshift (@$argv, defined $rest ? $starter.$rest : $arg) unless defined $optarg; return (0); } warn ("Value \"", $arg, "\" invalid for option ", $opt, " (real number expected)\n"); $error++; # Push back. unshift (@$argv, $starter.$rest) if defined $rest; return (1, undef); } else { # Push back. unshift (@$argv, defined $rest ? $starter.$rest : $arg); # Supply default value. $arg = 0.0; } } } else { die("Getopt::Long internal error (Can't happen)\n"); } return (1, $opt, $ctl, $arg, $key); } sub ValidValue ($$$$$) { my ($ctl, $arg, $mand, $argend, $prefix) = @_; if ( $ctl->[CTL_DEST] == CTL_DEST_HASH ) { return 0 unless $arg =~ /[^=]+=(.*)/; $arg = $1; } my $type = $ctl->[CTL_TYPE]; if ( $type eq 's' ) { # string # A mandatory string takes anything. return (1) if $mand; return (1) if $arg eq "-"; # Check for option or option list terminator. return 0 if $arg eq $argend || $arg =~ /^$prefix.+/; return 1; } elsif ( $type eq 'i' # numeric/integer || $type eq 'I' # numeric/integer w/ incr default || $type eq 'o' ) { # dec/oct/hex/bin value my $o_valid = $type eq 'o' ? PAT_XINT : PAT_INT; return $arg =~ /^$o_valid$/si; } elsif ( $type eq 'f' ) { # real number, int is also ok # We require at least one digit before a point or 'e', # and at least one digit following the point and 'e'. # [-]NN[.NN][eNN] my $o_valid = PAT_FLOAT; return $arg =~ /^$o_valid$/; } die("ValidValue: Cannot happen\n"); } # Getopt::Long Configuration. sub Configure (@) { my (@options) = @_; my $prevconfig = [ $error, $debug, $major_version, $minor_version, $autoabbrev, $getopt_compat, $ignorecase, $bundling, $order, $gnu_compat, $passthrough, $genprefix, $auto_version, $auto_help, $longprefix ]; if ( ref($options[0]) eq 'ARRAY' ) { ( $error, $debug, $major_version, $minor_version, $autoabbrev, $getopt_compat, $ignorecase, $bundling, $order, $gnu_compat, $passthrough, $genprefix, $auto_version, $auto_help, $longprefix ) = @{shift(@options)}; } my $opt; foreach $opt ( @options ) { my $try = lc ($opt); my $action = 1; if ( $try =~ /^no_?(.*)$/s ) { $action = 0; $try = $+; } if ( ($try eq 'default' or $try eq 'defaults') && $action ) { ConfigDefaults (); } elsif ( ($try eq 'posix_default' or $try eq 'posix_defaults') ) { local $ENV{POSIXLY_CORRECT}; $ENV{POSIXLY_CORRECT} = 1 if $action; ConfigDefaults (); } elsif ( $try eq 'auto_abbrev' or $try eq 'autoabbrev' ) { $autoabbrev = $action; } elsif ( $try eq 'getopt_compat' ) { $getopt_compat = $action; $genprefix = $action ? "(--|-|\\+)" : "(--|-)"; } elsif ( $try eq 'gnu_getopt' ) { if ( $action ) { $gnu_compat = 1; $bundling = 1; $getopt_compat = 0; $genprefix = "(--|-)"; $order = $PERMUTE; } } elsif ( $try eq 'gnu_compat' ) { $gnu_compat = $action; } elsif ( $try =~ /^(auto_?)?version$/ ) { $auto_version = $action; } elsif ( $try =~ /^(auto_?)?help$/ ) { $auto_help = $action; } elsif ( $try eq 'ignorecase' or $try eq 'ignore_case' ) { $ignorecase = $action; } elsif ( $try eq 'ignorecase_always' or $try eq 'ignore_case_always' ) { $ignorecase = $action ? 2 : 0; } elsif ( $try eq 'bundling' ) { $bundling = $action; } elsif ( $try eq 'bundling_override' ) { $bundling = $action ? 2 : 0; } elsif ( $try eq 'require_order' ) { $order = $action ? $REQUIRE_ORDER : $PERMUTE; } elsif ( $try eq 'permute' ) { $order = $action ? $PERMUTE : $REQUIRE_ORDER; } elsif ( $try eq 'pass_through' or $try eq 'passthrough' ) { $passthrough = $action; } elsif ( $try =~ /^prefix=(.+)$/ && $action ) { $genprefix = $1; # Turn into regexp. Needs to be parenthesized! $genprefix = "(" . quotemeta($genprefix) . ")"; eval { '' =~ /$genprefix/; }; die("Getopt::Long: invalid pattern \"$genprefix\"") if $@; } elsif ( $try =~ /^prefix_pattern=(.+)$/ && $action ) { $genprefix = $1; # Parenthesize if needed. $genprefix = "(" . $genprefix . ")" unless $genprefix =~ /^\(.*\)$/; eval { '' =~ m"$genprefix"; }; die("Getopt::Long: invalid pattern \"$genprefix\"") if $@; } elsif ( $try =~ /^long_prefix_pattern=(.+)$/ && $action ) { $longprefix = $1; # Parenthesize if needed. $longprefix = "(" . $longprefix . ")" unless $longprefix =~ /^\(.*\)$/; eval { '' =~ m"$longprefix"; }; die("Getopt::Long: invalid long prefix pattern \"$longprefix\"") if $@; } elsif ( $try eq 'debug' ) { $debug = $action; } else { die("Getopt::Long: unknown config parameter \"$opt\"") } } $prevconfig; } # Deprecated name. sub config (@) { Configure (@_); } # Issue a standard message for --version. # # The arguments are mostly the same as for Pod::Usage::pod2usage: # # - a number (exit value) # - a string (lead in message) # - a hash with options. See Pod::Usage for details. # sub VersionMessage(@) { # Massage args. my $pa = setup_pa_args("version", @_); my $v = $main::VERSION; my $fh = $pa->{-output} || ($pa->{-exitval} eq "NOEXIT" || $pa->{-exitval} < 2) ? \*STDOUT : \*STDERR; print $fh (defined($pa->{-message}) ? $pa->{-message} : (), $0, defined $v ? " version $v" : (), "\n", "(", __PACKAGE__, "::", "GetOptions", " version ", defined($Getopt::Long::VERSION_STRING) ? $Getopt::Long::VERSION_STRING : $VERSION, ";", " Perl version ", $] >= 5.006 ? sprintf("%vd", $^V) : $], ")\n"); exit($pa->{-exitval}) unless $pa->{-exitval} eq "NOEXIT"; } # Issue a standard message for --help. # # The arguments are the same as for Pod::Usage::pod2usage: # # - a number (exit value) # - a string (lead in message) # - a hash with options. See Pod::Usage for details. # sub HelpMessage(@) { eval { require Pod::Usage; import Pod::Usage; 1; } || die("Cannot provide help: cannot load Pod::Usage\n"); # Note that pod2usage will issue a warning if -exitval => NOEXIT. pod2usage(setup_pa_args("help", @_)); } # Helper routine to set up a normalized hash ref to be used as # argument to pod2usage. sub setup_pa_args($@) { my $tag = shift; # who's calling # If called by direct binding to an option, it will get the option # name and value as arguments. Remove these, if so. @_ = () if @_ == 2 && $_[0] eq $tag; my $pa; if ( @_ > 1 ) { $pa = { @_ }; } else { $pa = shift || {}; } # At this point, $pa can be a number (exit value), string # (message) or hash with options. if ( UNIVERSAL::isa($pa, 'HASH') ) { # Get rid of -msg vs. -message ambiguity. $pa->{-message} = $pa->{-msg}; delete($pa->{-msg}); } elsif ( $pa =~ /^-?\d+$/ ) { $pa = { -exitval => $pa }; } else { $pa = { -message => $pa }; } # These are _our_ defaults. $pa->{-verbose} = 0 unless exists($pa->{-verbose}); $pa->{-exitval} = 0 unless exists($pa->{-exitval}); $pa; } # Sneak way to know what version the user requested. sub VERSION { $requested_version = $_[1]; shift->SUPER::VERSION(@_); } package Getopt::Long::CallBack; sub new { my ($pkg, %atts) = @_; bless { %atts }, $pkg; } sub name { my $self = shift; ''.$self->{name}; } use overload # Treat this object as an ordinary string for legacy API. '""' => \&name, fallback => 1; 1; ################ Documentation ################ =head1 NAME Getopt::Long - Extended processing of command line options =head1 SYNOPSIS use Getopt::Long; my $data = "file.dat"; my $length = 24; my $verbose; $result = GetOptions ("length=i" => \$length, # numeric "file=s" => \$data, # string "verbose" => \$verbose); # flag =head1 DESCRIPTION The Getopt::Long module implements an extended getopt function called GetOptions(). This function adheres to the POSIX syntax for command line options, with GNU extensions. In general, this means that options have long names instead of single letters, and are introduced with a double dash "--". Support for bundling of command line options, as was the case with the more traditional single-letter approach, is provided but not enabled by default. =head1 Command Line Options, an Introduction Command line operated programs traditionally take their arguments from the command line, for example filenames or other information that the program needs to know. Besides arguments, these programs often take command line I as well. Options are not necessary for the program to work, hence the name 'option', but are used to modify its default behaviour. For example, a program could do its job quietly, but with a suitable option it could provide verbose information about what it did. Command line options come in several flavours. Historically, they are preceded by a single dash C<->, and consist of a single letter. -l -a -c Usually, these single-character options can be bundled: -lac Options can have values, the value is placed after the option character. Sometimes with whitespace in between, sometimes not: -s 24 -s24 Due to the very cryptic nature of these options, another style was developed that used long names. So instead of a cryptic C<-l> one could use the more descriptive C<--long>. To distinguish between a bundle of single-character options and a long one, two dashes are used to precede the option name. Early implementations of long options used a plus C<+> instead. Also, option values could be specified either like --size=24 or --size 24 The C<+> form is now obsolete and strongly deprecated. =head1 Getting Started with Getopt::Long Getopt::Long is the Perl5 successor of C. This was the first Perl module that provided support for handling the new style of command line options, hence the name Getopt::Long. This module also supports single-character options and bundling. Single character options may be any alphabetic character, a question mark, and a dash. Long options may consist of a series of letters, digits, and dashes. Although this is currently not enforced by Getopt::Long, multiple consecutive dashes are not allowed, and the option name must not end with a dash. To use Getopt::Long from a Perl program, you must include the following line in your Perl program: use Getopt::Long; This will load the core of the Getopt::Long module and prepare your program for using it. Most of the actual Getopt::Long code is not loaded until you really call one of its functions. In the default configuration, options names may be abbreviated to uniqueness, case does not matter, and a single dash is sufficient, even for long option names. Also, options may be placed between non-option arguments. See L for more details on how to configure Getopt::Long. =head2 Simple options The most simple options are the ones that take no values. Their mere presence on the command line enables the option. Popular examples are: --all --verbose --quiet --debug Handling simple options is straightforward: my $verbose = ''; # option variable with default value (false) my $all = ''; # option variable with default value (false) GetOptions ('verbose' => \$verbose, 'all' => \$all); The call to GetOptions() parses the command line arguments that are present in C<@ARGV> and sets the option variable to the value C<1> if the option did occur on the command line. Otherwise, the option variable is not touched. Setting the option value to true is often called I the option. The option name as specified to the GetOptions() function is called the option I. Later we'll see that this specification can contain more than just the option name. The reference to the variable is called the option I. GetOptions() will return a true value if the command line could be processed successfully. Otherwise, it will write error messages to STDERR, and return a false result. =head2 A little bit less simple options Getopt::Long supports two useful variants of simple options: I options and I options. A negatable option is specified with an exclamation mark C after the option name: my $verbose = ''; # option variable with default value (false) GetOptions ('verbose!' => \$verbose); Now, using C<--verbose> on the command line will enable C<$verbose>, as expected. But it is also allowed to use C<--noverbose>, which will disable C<$verbose> by setting its value to C<0>. Using a suitable default value, the program can find out whether C<$verbose> is false by default, or disabled by using C<--noverbose>. An incremental option is specified with a plus C<+> after the option name: my $verbose = ''; # option variable with default value (false) GetOptions ('verbose+' => \$verbose); Using C<--verbose> on the command line will increment the value of C<$verbose>. This way the program can keep track of how many times the option occurred on the command line. For example, each occurrence of C<--verbose> could increase the verbosity level of the program. =head2 Mixing command line option with other arguments Usually programs take command line options as well as other arguments, for example, file names. It is good practice to always specify the options first, and the other arguments last. Getopt::Long will, however, allow the options and arguments to be mixed and 'filter out' all the options before passing the rest of the arguments to the program. To stop Getopt::Long from processing further arguments, insert a double dash C<--> on the command line: --size 24 -- --all In this example, C<--all> will I be treated as an option, but passed to the program unharmed, in C<@ARGV>. =head2 Options with values For options that take values it must be specified whether the option value is required or not, and what kind of value the option expects. Three kinds of values are supported: integer numbers, floating point numbers, and strings. If the option value is required, Getopt::Long will take the command line argument that follows the option and assign this to the option variable. If, however, the option value is specified as optional, this will only be done if that value does not look like a valid command line option itself. my $tag = ''; # option variable with default value GetOptions ('tag=s' => \$tag); In the option specification, the option name is followed by an equals sign C<=> and the letter C. The equals sign indicates that this option requires a value. The letter C indicates that this value is an arbitrary string. Other possible value types are C for integer values, and C for floating point values. Using a colon C<:> instead of the equals sign indicates that the option value is optional. In this case, if no suitable value is supplied, string valued options get an empty string C<''> assigned, while numeric options are set to C<0>. =head2 Options with multiple values Options sometimes take several values. For example, a program could use multiple directories to search for library files: --library lib/stdlib --library lib/extlib To accomplish this behaviour, simply specify an array reference as the destination for the option: GetOptions ("library=s" => \@libfiles); Alternatively, you can specify that the option can have multiple values by adding a "@", and pass a scalar reference as the destination: GetOptions ("library=s@" => \$libfiles); Used with the example above, C<@libfiles> (or C<@$libfiles>) would contain two strings upon completion: C<"lib/srdlib"> and C<"lib/extlib">, in that order. It is also possible to specify that only integer or floating point numbers are acceptable values. Often it is useful to allow comma-separated lists of values as well as multiple occurrences of the options. This is easy using Perl's split() and join() operators: GetOptions ("library=s" => \@libfiles); @libfiles = split(/,/,join(',',@libfiles)); Of course, it is important to choose the right separator string for each purpose. Warning: What follows is an experimental feature. Options can take multiple values at once, for example --coordinates 52.2 16.4 --rgbcolor 255 255 149 This can be accomplished by adding a repeat specifier to the option specification. Repeat specifiers are very similar to the C<{...}> repeat specifiers that can be used with regular expression patterns. For example, the above command line would be handled as follows: GetOptions('coordinates=f{2}' => \@coor, 'rgbcolor=i{3}' => \@color); The destination for the option must be an array or array reference. It is also possible to specify the minimal and maximal number of arguments an option takes. C indicates an option that takes at least two and at most 4 arguments. C indicates one or more values; C indicates zero or more option values. =head2 Options with hash values If the option destination is a reference to a hash, the option will take, as value, strings of the form IC<=>I. The value will be stored with the specified key in the hash. GetOptions ("define=s" => \%defines); Alternatively you can use: GetOptions ("define=s%" => \$defines); When used with command line options: --define os=linux --define vendor=redhat the hash C<%defines> (or C<%$defines>) will contain two keys, C<"os"> with value C<"linux"> and C<"vendor"> with value C<"redhat">. It is also possible to specify that only integer or floating point numbers are acceptable values. The keys are always taken to be strings. =head2 User-defined subroutines to handle options Ultimate control over what should be done when (actually: each time) an option is encountered on the command line can be achieved by designating a reference to a subroutine (or an anonymous subroutine) as the option destination. When GetOptions() encounters the option, it will call the subroutine with two or three arguments. The first argument is the name of the option. (Actually, it is an object that stringifies to the name of the option.) For a scalar or array destination, the second argument is the value to be stored. For a hash destination, the second arguments is the key to the hash, and the third argument the value to be stored. It is up to the subroutine to store the value, or do whatever it thinks is appropriate. A trivial application of this mechanism is to implement options that are related to each other. For example: my $verbose = ''; # option variable with default value (false) GetOptions ('verbose' => \$verbose, 'quiet' => sub { $verbose = 0 }); Here C<--verbose> and C<--quiet> control the same variable C<$verbose>, but with opposite values. If the subroutine needs to signal an error, it should call die() with the desired error message as its argument. GetOptions() will catch the die(), issue the error message, and record that an error result must be returned upon completion. If the text of the error message starts with an exclamation mark C it is interpreted specially by GetOptions(). There is currently one special command implemented: C will cause GetOptions() to stop processing options, as if it encountered a double dash C<-->. In version 2.37 the first argument to the callback function was changed from string to object. This was done to make room for extensions and more detailed control. The object stringifies to the option name so this change should not introduce compatibility problems. =head2 Options with multiple names Often it is user friendly to supply alternate mnemonic names for options. For example C<--height> could be an alternate name for C<--length>. Alternate names can be included in the option specification, separated by vertical bar C<|> characters. To implement the above example: GetOptions ('length|height=f' => \$length); The first name is called the I name, the other names are called I. When using a hash to store options, the key will always be the primary name. Multiple alternate names are possible. =head2 Case and abbreviations Without additional configuration, GetOptions() will ignore the case of option names, and allow the options to be abbreviated to uniqueness. GetOptions ('length|height=f' => \$length, "head" => \$head); This call will allow C<--l> and C<--L> for the length option, but requires a least C<--hea> and C<--hei> for the head and height options. =head2 Summary of Option Specifications Each option specifier consists of two parts: the name specification and the argument specification. The name specification contains the name of the option, optionally followed by a list of alternative names separated by vertical bar characters. length option name is "length" length|size|l name is "length", aliases are "size" and "l" The argument specification is optional. If omitted, the option is considered boolean, a value of 1 will be assigned when the option is used on the command line. The argument specification can be =over 4 =item ! The option does not take an argument and may be negated by prefixing it with "no" or "no-". E.g. C<"foo!"> will allow C<--foo> (a value of 1 will be assigned) as well as C<--nofoo> and C<--no-foo> (a value of 0 will be assigned). If the option has aliases, this applies to the aliases as well. Using negation on a single letter option when bundling is in effect is pointless and will result in a warning. =item + The option does not take an argument and will be incremented by 1 every time it appears on the command line. E.g. C<"more+">, when used with C<--more --more --more>, will increment the value three times, resulting in a value of 3 (provided it was 0 or undefined at first). The C<+> specifier is ignored if the option destination is not a scalar. =item = I [ I ] [ I ] The option requires an argument of the given type. Supported types are: =over 4 =item s String. An arbitrary sequence of characters. It is valid for the argument to start with C<-> or C<-->. =item i Integer. An optional leading plus or minus sign, followed by a sequence of digits. =item o Extended integer, Perl style. This can be either an optional leading plus or minus sign, followed by a sequence of digits, or an octal string (a zero, optionally followed by '0', '1', .. '7'), or a hexadecimal string (C<0x> followed by '0' .. '9', 'a' .. 'f', case insensitive), or a binary string (C<0b> followed by a series of '0' and '1'). =item f Real number. For example C<3.14>, C<-6.23E24> and so on. =back The I can be C<@> or C<%> to specify that the option is list or a hash valued. This is only needed when the destination for the option value is not otherwise specified. It should be omitted when not needed. The I specifies the number of values this option takes per occurrence on the command line. It has the format C<{> [ I ] [ C<,> [ I ] ] C<}>. I denotes the minimal number of arguments. It defaults to 1 for options with C<=> and to 0 for options with C<:>, see below. Note that I overrules the C<=> / C<:> semantics. I denotes the maximum number of arguments. It must be at least I. If I is omitted, I, there is no upper bound to the number of argument values taken. =item : I [ I ] Like C<=>, but designates the argument as optional. If omitted, an empty string will be assigned to string values options, and the value zero to numeric options. Note that if a string argument starts with C<-> or C<-->, it will be considered an option on itself. =item : I [ I ] Like C<:i>, but if the value is omitted, the I will be assigned. =item : + [ I ] Like C<:i>, but if the value is omitted, the current value for the option will be incremented. =back =head1 Advanced Possibilities =head2 Object oriented interface Getopt::Long can be used in an object oriented way as well: use Getopt::Long; $p = new Getopt::Long::Parser; $p->configure(...configuration options...); if ($p->getoptions(...options descriptions...)) ... Configuration options can be passed to the constructor: $p = new Getopt::Long::Parser config => [...configuration options...]; =head2 Thread Safety Getopt::Long is thread safe when using ithreads as of Perl 5.8. It is I thread safe when using the older (experimental and now obsolete) threads implementation that was added to Perl 5.005. =head2 Documentation and help texts Getopt::Long encourages the use of Pod::Usage to produce help messages. For example: use Getopt::Long; use Pod::Usage; my $man = 0; my $help = 0; GetOptions('help|?' => \$help, man => \$man) or pod2usage(2); pod2usage(1) if $help; pod2usage(-exitstatus => 0, -verbose => 2) if $man; __END__ =head1 NAME sample - Using Getopt::Long and Pod::Usage =head1 SYNOPSIS sample [options] [file ...] Options: -help brief help message -man full documentation =head1 OPTIONS =over 8 =item B<-help> Print a brief help message and exits. =item B<-man> Prints the manual page and exits. =back =head1 DESCRIPTION B will read the given input file(s) and do something useful with the contents thereof. =cut See L for details. =head2 Parsing options from an arbitrary array By default, GetOptions parses the options that are present in the global array C<@ARGV>. A special entry C can be used to parse options from an arbitrary array. use Getopt::Long qw(GetOptionsFromArray); $ret = GetOptionsFromArray(\@myopts, ...); When used like this, the global C<@ARGV> is not touched at all. The following two calls behave identically: $ret = GetOptions( ... ); $ret = GetOptionsFromArray(\@ARGV, ... ); =head2 Parsing options from an arbitrary string A special entry C can be used to parse options from an arbitrary string. use Getopt::Long qw(GetOptionsFromString); $ret = GetOptionsFromString($string, ...); The contents of the string are split into arguments using a call to C. As with C, the global C<@ARGV> is not touched. It is possible that, upon completion, not all arguments in the string have been processed. C will, when called in list context, return both the return status and an array reference to any remaining arguments: ($ret, $args) = GetOptionsFromString($string, ... ); If any arguments remain, and C was not called in list context, a message will be given and C will return failure. =head2 Storing options values in a hash Sometimes, for example when there are a lot of options, having a separate variable for each of them can be cumbersome. GetOptions() supports, as an alternative mechanism, storing options values in a hash. To obtain this, a reference to a hash must be passed I to GetOptions(). For each option that is specified on the command line, the option value will be stored in the hash with the option name as key. Options that are not actually used on the command line will not be put in the hash, on other words, C (or defined()) can be used to test if an option was used. The drawback is that warnings will be issued if the program runs under C and uses C<$h{option}> without testing with exists() or defined() first. my %h = (); GetOptions (\%h, 'length=i'); # will store in $h{length} For options that take list or hash values, it is necessary to indicate this by appending an C<@> or C<%> sign after the type: GetOptions (\%h, 'colours=s@'); # will push to @{$h{colours}} To make things more complicated, the hash may contain references to the actual destinations, for example: my $len = 0; my %h = ('length' => \$len); GetOptions (\%h, 'length=i'); # will store in $len This example is fully equivalent with: my $len = 0; GetOptions ('length=i' => \$len); # will store in $len Any mixture is possible. For example, the most frequently used options could be stored in variables while all other options get stored in the hash: my $verbose = 0; # frequently referred my $debug = 0; # frequently referred my %h = ('verbose' => \$verbose, 'debug' => \$debug); GetOptions (\%h, 'verbose', 'debug', 'filter', 'size=i'); if ( $verbose ) { ... } if ( exists $h{filter} ) { ... option 'filter' was specified ... } =head2 Bundling With bundling it is possible to set several single-character options at once. For example if C, C and C are all valid options, -vax would set all three. Getopt::Long supports two levels of bundling. To enable bundling, a call to Getopt::Long::Configure is required. The first level of bundling can be enabled with: Getopt::Long::Configure ("bundling"); Configured this way, single-character options can be bundled but long options B always start with a double dash C<--> to avoid ambiguity. For example, when C, C, C and C are all valid options, -vax would set C, C and C, but --vax would set C. The second level of bundling lifts this restriction. It can be enabled with: Getopt::Long::Configure ("bundling_override"); Now, C<-vax> would set the option C. When any level of bundling is enabled, option values may be inserted in the bundle. For example: -h24w80 is equivalent to -h 24 -w 80 When configured for bundling, single-character options are matched case sensitive while long options are matched case insensitive. To have the single-character options matched case insensitive as well, use: Getopt::Long::Configure ("bundling", "ignorecase_always"); It goes without saying that bundling can be quite confusing. =head2 The lonesome dash Normally, a lone dash C<-> on the command line will not be considered an option. Option processing will terminate (unless "permute" is configured) and the dash will be left in C<@ARGV>. It is possible to get special treatment for a lone dash. This can be achieved by adding an option specification with an empty name, for example: GetOptions ('' => \$stdio); A lone dash on the command line will now be a legal option, and using it will set variable C<$stdio>. =head2 Argument callback A special option 'name' C<< <> >> can be used to designate a subroutine to handle non-option arguments. When GetOptions() encounters an argument that does not look like an option, it will immediately call this subroutine and passes it one parameter: the argument name. Well, actually it is an object that stringifies to the argument name. For example: my $width = 80; sub process { ... } GetOptions ('width=i' => \$width, '<>' => \&process); When applied to the following command line: arg1 --width=72 arg2 --width=60 arg3 This will call C while C<$width> is C<80>, C while C<$width> is C<72>, and C while C<$width> is C<60>. This feature requires configuration option B, see section L. =head1 Configuring Getopt::Long Getopt::Long can be configured by calling subroutine Getopt::Long::Configure(). This subroutine takes a list of quoted strings, each specifying a configuration option to be enabled, e.g. C, or disabled, e.g. C. Case does not matter. Multiple calls to Configure() are possible. Alternatively, as of version 2.24, the configuration options may be passed together with the C statement: use Getopt::Long qw(:config no_ignore_case bundling); The following options are available: =over 12 =item default This option causes all configuration options to be reset to their default values. =item posix_default This option causes all configuration options to be reset to their default values as if the environment variable POSIXLY_CORRECT had been set. =item auto_abbrev Allow option names to be abbreviated to uniqueness. Default is enabled unless environment variable POSIXLY_CORRECT has been set, in which case C is disabled. =item getopt_compat Allow C<+> to start options. Default is enabled unless environment variable POSIXLY_CORRECT has been set, in which case C is disabled. =item gnu_compat C controls whether C<--opt=> is allowed, and what it should do. Without C, C<--opt=> gives an error. With C, C<--opt=> will give option C and empty value. This is the way GNU getopt_long() does it. =item gnu_getopt This is a short way of setting C C C C. With C, command line handling should be fully compatible with GNU getopt_long(). =item require_order Whether command line arguments are allowed to be mixed with options. Default is disabled unless environment variable POSIXLY_CORRECT has been set, in which case C is enabled. See also C, which is the opposite of C. =item permute Whether command line arguments are allowed to be mixed with options. Default is enabled unless environment variable POSIXLY_CORRECT has been set, in which case C is disabled. Note that C is the opposite of C. If C is enabled, this means that --foo arg1 --bar arg2 arg3 is equivalent to --foo --bar arg1 arg2 arg3 If an argument callback routine is specified, C<@ARGV> will always be empty upon successful return of GetOptions() since all options have been processed. The only exception is when C<--> is used: --foo arg1 --bar arg2 -- arg3 This will call the callback routine for arg1 and arg2, and then terminate GetOptions() leaving C<"arg3"> in C<@ARGV>. If C is enabled, options processing terminates when the first non-option is encountered. --foo arg1 --bar arg2 arg3 is equivalent to --foo -- arg1 --bar arg2 arg3 If C is also enabled, options processing will terminate at the first unrecognized option, or non-option, whichever comes first. =item bundling (default: disabled) Enabling this option will allow single-character options to be bundled. To distinguish bundles from long option names, long options I be introduced with C<--> and bundles with C<->. Note that, if you have options C, C and C, and auto_abbrev enabled, possible arguments and option settings are: using argument sets option(s) ------------------------------------------ -a, --a a -l, --l l -al, -la, -ala, -all,... a, l --al, --all all The surprising part is that C<--a> sets option C (due to auto completion), not C. Note: disabling C also disables C. =item bundling_override (default: disabled) If C is enabled, bundling is enabled as with C but now long option names override option bundles. Note: disabling C also disables C. B Using option bundling can easily lead to unexpected results, especially when mixing long options and bundles. Caveat emptor. =item ignore_case (default: enabled) If enabled, case is ignored when matching long option names. If, however, bundling is enabled as well, single character options will be treated case-sensitive. With C, option specifications for options that only differ in case, e.g., C<"foo"> and C<"Foo">, will be flagged as duplicates. Note: disabling C also disables C. =item ignore_case_always (default: disabled) When bundling is in effect, case is ignored on single-character options also. Note: disabling C also disables C. =item auto_version (default:disabled) Automatically provide support for the B<--version> option if the application did not specify a handler for this option itself. Getopt::Long will provide a standard version message that includes the program name, its version (if $main::VERSION is defined), and the versions of Getopt::Long and Perl. The message will be written to standard output and processing will terminate. C will be enabled if the calling program explicitly specified a version number higher than 2.32 in the C or C statement. =item auto_help (default:disabled) Automatically provide support for the B<--help> and B<-?> options if the application did not specify a handler for this option itself. Getopt::Long will provide a help message using module L. The message, derived from the SYNOPSIS POD section, will be written to standard output and processing will terminate. C will be enabled if the calling program explicitly specified a version number higher than 2.32 in the C or C statement. =item pass_through (default: disabled) Options that are unknown, ambiguous or supplied with an invalid option value are passed through in C<@ARGV> instead of being flagged as errors. This makes it possible to write wrapper scripts that process only part of the user supplied command line arguments, and pass the remaining options to some other program. If C is enabled, options processing will terminate at the first unrecognized option, or non-option, whichever comes first. However, if C is enabled instead, results can become confusing. Note that the options terminator (default C<-->), if present, will also be passed through in C<@ARGV>. =item prefix The string that starts options. If a constant string is not sufficient, see C. =item prefix_pattern A Perl pattern that identifies the strings that introduce options. Default is C<--|-|\+> unless environment variable POSIXLY_CORRECT has been set, in which case it is C<--|->. =item long_prefix_pattern A Perl pattern that allows the disambiguation of long and short prefixes. Default is C<-->. Typically you only need to set this if you are using nonstandard prefixes and want some or all of them to have the same semantics as '--' does under normal circumstances. For example, setting prefix_pattern to C<--|-|\+|\/> and long_prefix_pattern to C<--|\/> would add Win32 style argument handling. =item debug (default: disabled) Enable debugging output. =back =head1 Exportable Methods =over =item VersionMessage This subroutine provides a standard version message. Its argument can be: =over 4 =item * A string containing the text of a message to print I printing the standard message. =item * A numeric value corresponding to the desired exit status. =item * A reference to a hash. =back If more than one argument is given then the entire argument list is assumed to be a hash. If a hash is supplied (either as a reference or as a list) it should contain one or more elements with the following keys: =over 4 =item C<-message> =item C<-msg> The text of a message to print immediately prior to printing the program's usage message. =item C<-exitval> The desired exit status to pass to the B function. This should be an integer, or else the string "NOEXIT" to indicate that control should simply be returned without terminating the invoking process. =item C<-output> A reference to a filehandle, or the pathname of a file to which the usage message should be written. The default is C<\*STDERR> unless the exit value is less than 2 (in which case the default is C<\*STDOUT>). =back You cannot tie this routine directly to an option, e.g.: GetOptions("version" => \&VersionMessage); Use this instead: GetOptions("version" => sub { VersionMessage() }); =item HelpMessage This subroutine produces a standard help message, derived from the program's POD section SYNOPSIS using L. It takes the same arguments as VersionMessage(). In particular, you cannot tie it directly to an option, e.g.: GetOptions("help" => \&HelpMessage); Use this instead: GetOptions("help" => sub { HelpMessage() }); =back =head1 Return values and Errors Configuration errors and errors in the option definitions are signalled using die() and will terminate the calling program unless the call to Getopt::Long::GetOptions() was embedded in C, or die() was trapped using C<$SIG{__DIE__}>. GetOptions returns true to indicate success. It returns false when the function detected one or more errors during option parsing. These errors are signalled using warn() and can be trapped with C<$SIG{__WARN__}>. =head1 Legacy The earliest development of C started in 1990, with Perl version 4. As a result, its development, and the development of Getopt::Long, has gone through several stages. Since backward compatibility has always been extremely important, the current version of Getopt::Long still supports a lot of constructs that nowadays are no longer necessary or otherwise unwanted. This section describes briefly some of these 'features'. =head2 Default destinations When no destination is specified for an option, GetOptions will store the resultant value in a global variable named CI, where I is the primary name of this option. When a progam executes under C (recommended), these variables must be pre-declared with our() or C. our $opt_length = 0; GetOptions ('length=i'); # will store in $opt_length To yield a usable Perl variable, characters that are not part of the syntax for variables are translated to underscores. For example, C<--fpp-struct-return> will set the variable C<$opt_fpp_struct_return>. Note that this variable resides in the namespace of the calling program, not necessarily C
. For example: GetOptions ("size=i", "sizes=i@"); with command line "-size 10 -sizes 24 -sizes 48" will perform the equivalent of the assignments $opt_size = 10; @opt_sizes = (24, 48); =head2 Alternative option starters A string of alternative option starter characters may be passed as the first argument (or the first argument after a leading hash reference argument). my $len = 0; GetOptions ('/', 'length=i' => $len); Now the command line may look like: /length 24 -- arg Note that to terminate options processing still requires a double dash C<-->. GetOptions() will not interpret a leading C<< "<>" >> as option starters if the next argument is a reference. To force C<< "<" >> and C<< ">" >> as option starters, use C<< "><" >>. Confusing? Well, B anyway. =head2 Configuration variables Previous versions of Getopt::Long used variables for the purpose of configuring. Although manipulating these variables still work, it is strongly encouraged to use the C routine that was introduced in version 2.17. Besides, it is much easier. =head1 Tips and Techniques =head2 Pushing multiple values in a hash option Sometimes you want to combine the best of hashes and arrays. For example, the command line: --list add=first --list add=second --list add=third where each successive 'list add' option will push the value of add into array ref $list->{'add'}. The result would be like $list->{add} = [qw(first second third)]; This can be accomplished with a destination routine: GetOptions('list=s%' => sub { push(@{$list{$_[1]}}, $_[2]) }); =head1 Troubleshooting =head2 GetOptions does not return a false result when an option is not supplied That's why they're called 'options'. =head2 GetOptions does not split the command line correctly The command line is not split by GetOptions, but by the command line interpreter (CLI). On Unix, this is the shell. On Windows, it is COMMAND.COM or CMD.EXE. Other operating systems have other CLIs. It is important to know that these CLIs may behave different when the command line contains special characters, in particular quotes or backslashes. For example, with Unix shells you can use single quotes (C<'>) and double quotes (C<">) to group words together. The following alternatives are equivalent on Unix: "two words" 'two words' two\ words In case of doubt, insert the following statement in front of your Perl program: print STDERR (join("|",@ARGV),"\n"); to verify how your CLI passes the arguments to the program. =head2 Undefined subroutine &main::GetOptions called Are you running Windows, and did you write use GetOpt::Long; (note the capital 'O')? =head2 How do I put a "-?" option into a Getopt::Long? You can only obtain this using an alias, and Getopt::Long of at least version 2.13. use Getopt::Long; GetOptions ("help|?"); # -help and -? will both set $opt_help =head1 AUTHOR Johan Vromans =head1 COPYRIGHT AND DISCLAIMER This program is Copyright 1990,2009 by Johan Vromans. This program is free software; you can redistribute it and/or modify it under the terms of the Perl Artistic License or the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. If you do not have a copy of the GNU General Public License write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. =cut package Getopt::Std; require 5.000; require Exporter; =head1 NAME getopt, getopts - Process single-character switches with switch clustering =head1 SYNOPSIS use Getopt::Std; getopt('oDI'); # -o, -D & -I take arg. Sets $opt_* as a side effect. getopt('oDI', \%opts); # -o, -D & -I take arg. Values in %opts getopts('oif:'); # -o & -i are boolean flags, -f takes an argument # Sets $opt_* as a side effect. getopts('oif:', \%opts); # options as above. Values in %opts =head1 DESCRIPTION The getopt() function processes single-character switches with switch clustering. Pass one argument which is a string containing all switches that take an argument. For each switch found, sets $opt_x (where x is the switch name) to the value of the argument if an argument is expected, or 1 otherwise. Switches which take an argument don't care whether there is a space between the switch and the argument. The getopts() function is similar, but you should pass to it the list of all switches to be recognized. If unspecified switches are found on the command-line, the user will be warned that an unknown option was given. The getopts() function returns true unless an invalid option was found. Note that, if your code is running under the recommended C pragma, you will need to declare these package variables with "our": our($opt_x, $opt_y); For those of you who don't like additional global variables being created, getopt() and getopts() will also accept a hash reference as an optional second argument. Hash keys will be x (where x is the switch name) with key values the value of the argument or 1 if no argument is specified. To allow programs to process arguments that look like switches, but aren't, both functions will stop processing switches when they see the argument C<-->. The C<--> will be removed from @ARGV. =head1 C<--help> and C<--version> If C<-> is not a recognized switch letter, getopts() supports arguments C<--help> and C<--version>. If C and/or C are defined, they are called; the arguments are the output file handle, the name of option-processing package, its version, and the switches string. If the subroutines are not defined, an attempt is made to generate intelligent messages; for best results, define $main::VERSION. If embedded documentation (in pod format, see L) is detected in the script, C<--help> will also show how to access the documentation. Note that due to excessive paranoia, if $Getopt::Std::STANDARD_HELP_VERSION isn't true (the default is false), then the messages are printed on STDERR, and the processing continues after the messages are printed. This being the opposite of the standard-conforming behaviour, it is strongly recommended to set $Getopt::Std::STANDARD_HELP_VERSION to true. One can change the output file handle of the messages by setting $Getopt::Std::OUTPUT_HELP_VERSION. One can print the messages of C<--help> (without the C line) and C<--version> by calling functions help_mess() and version_mess() with the switches string as an argument. =cut @ISA = qw(Exporter); @EXPORT = qw(getopt getopts); $VERSION = '1.06'; # uncomment the next line to disable 1.03-backward compatibility paranoia # $STANDARD_HELP_VERSION = 1; # Process single-character switches with switch clustering. Pass one argument # which is a string containing all switches that take an argument. For each # switch found, sets $opt_x (where x is the switch name) to the value of the # argument, or 1 if no argument. Switches which take an argument don't care # whether there is a space between the switch and the argument. # Usage: # getopt('oDI'); # -o, -D & -I take arg. Sets opt_* as a side effect. sub getopt (;$$) { my ($argumentative, $hash) = @_; $argumentative = '' if !defined $argumentative; my ($first,$rest); local $_; local @EXPORT; while (@ARGV && ($_ = $ARGV[0]) =~ /^-(.)(.*)/) { ($first,$rest) = ($1,$2); if (/^--$/) { # early exit if -- shift @ARGV; last; } if (index($argumentative,$first) >= 0) { if ($rest ne '') { shift(@ARGV); } else { shift(@ARGV); $rest = shift(@ARGV); } if (ref $hash) { $$hash{$first} = $rest; } else { ${"opt_$first"} = $rest; push( @EXPORT, "\$opt_$first" ); } } else { if (ref $hash) { $$hash{$first} = 1; } else { ${"opt_$first"} = 1; push( @EXPORT, "\$opt_$first" ); } if ($rest ne '') { $ARGV[0] = "-$rest"; } else { shift(@ARGV); } } } unless (ref $hash) { local $Exporter::ExportLevel = 1; import Getopt::Std; } } sub output_h () { return $OUTPUT_HELP_VERSION if defined $OUTPUT_HELP_VERSION; return \*STDOUT if $STANDARD_HELP_VERSION; return \*STDERR; } sub try_exit () { exit 0 if $STANDARD_HELP_VERSION; my $p = __PACKAGE__; print {output_h()} <= 5.006; print $h <) { $has_pod = 1, last if /^=(pod|head1)/; } } print $h <= 0) { if (defined($args[$pos+1]) and ($args[$pos+1] eq ':')) { shift(@ARGV); if ($rest eq '') { ++$errs unless @ARGV; $rest = shift(@ARGV); } if (ref $hash) { $$hash{$first} = $rest; } else { ${"opt_$first"} = $rest; push( @EXPORT, "\$opt_$first" ); } } else { if (ref $hash) { $$hash{$first} = 1; } else { ${"opt_$first"} = 1; push( @EXPORT, "\$opt_$first" ); } if ($rest eq '') { shift(@ARGV); } else { $ARGV[0] = "-$rest"; } } } else { if ($first eq '-' and $rest eq 'help') { version_mess($argumentative, 'main'); help_mess($argumentative, 'main'); try_exit(); shift(@ARGV); next; } elsif ($first eq '-' and $rest eq 'version') { version_mess($argumentative, 'main'); try_exit(); shift(@ARGV); next; } warn "Unknown option: $first\n"; ++$errs; if ($rest ne '') { $ARG# vim: ts=4 sts=4 sw=4 et: # # This file is part of HTTP-Tiny # # This software is copyright (c) 2011 by Christian Hansen. # # This is free software; you can redistribute it and/or modify it under # the same terms as the Perl 5 programming language system itself. # package HTTP::Tiny; BEGIN { $HTTP::Tiny::VERSION = '0.012'; } use strict; use warnings; # ABSTRACT: A small, simple, correct HTTP/1.1 client use Carp (); my @attributes; BEGIN { @attributes = qw(agent default_headers max_redirect max_size proxy timeout); no strict 'refs'; for my $accessor ( @attributes ) { *{$accessor} = sub { @_ > 1 ? $_[0]->{$accessor} = $_[1] : $_[0]->{$accessor}; }; } } sub new { my($class, %args) = @_; (my $agent = $class) =~ s{::}{-}g; my $self = { agent => $agent . "/" . ($class->VERSION || 0), max_redirect => 5, timeout => 60, }; for my $key ( @attributes ) { $self->{$key} = $args{$key} if exists $args{$key} } return bless $self, $class; } sub get { my ($self, $url, $args) = @_; @_ == 2 || (@_ == 3 && ref $args eq 'HASH') or Carp::croak(q/Usage: $http->get(URL, [HASHREF])/); return $self->request('GET', $url, $args || {}); } sub mirror { my ($self, $url, $file, $args) = @_; @_ == 3 || (@_ == 4 && ref $args eq 'HASH') or Carp::croak(q/Usage: $http->mirror(URL, FILE, [HASHREF])/); if ( -e $file and my $mtime = (stat($file))[9] ) { $args->{headers}{'if-modified-since'} ||= $self->_http_date($mtime); } my $tempfile = $file . int(rand(2**31)); open my $fh, ">", $tempfile or Carp::croak(qq/Error: Could not open temporary file $tempfile for downloading: $!/); binmode $fh; $args->{data_callback} = sub { print {$fh} $_[0] }; my $response = $self->request('GET', $url, $args); close $fh or Carp::croak(qq/Error: Could not close temporary file $tempfile: $!/); if ( $response->{success} ) { rename $tempfile, $file or Carp::croak "Error replacing $file with $tempfile: $!\n"; my $lm = $response->{headers}{'last-modified'}; if ( $lm and my $mtime = $self->_parse_http_date($lm) ) { utime $mtime, $mtime, $file; } } $response->{success} ||= $response->{status} eq '304'; unlink $tempfile; return $response; } my %idempotent = map { $_ => 1 } qw/GET HEAD PUT DELETE OPTIONS TRACE/; sub request { my ($self, $method, $url, $args) = @_; @_ == 3 || (@_ == 4 && ref $args eq 'HASH') or Carp::croak(q/Usage: $http->request(METHOD, URL, [HASHREF])/); $args ||= {}; # we keep some state in this during _request # RFC 2616 Section 8.1.4 mandates a single retry on broken socket my $response; for ( 0 .. 1 ) { $response = eval { $self->_request($method, $url, $args) }; last unless $@ && $idempotent{$method} && $@ =~ m{^(?:Socket closed|Unexpected end)}; } if (my $e = "$@") { $response = { success => q{}, status => 599, reason => 'Internal Exception', content => $e, headers => { 'content-type' => 'text/plain', 'content-length' => length $e, } }; } return $response; } my %DefaultPort = ( http => 80, https => 443, ); sub _request { my ($self, $method, $url, $args) = @_; my ($scheme, $host, $port, $path_query) = $self->_split_url($url); my $request = { method => $method, scheme => $scheme, host_port => ($port == $DefaultPort{$scheme} ? $host : "$host:$port"), uri => $path_query, headers => {}, }; my $handle = HTTP::Tiny::Handle->new(timeout => $self->{timeout}); if ($self->{proxy}) { $request->{uri} = "$scheme://$request->{host_port}$path_query"; croak(qq/HTTPS via proxy is not supported/) if $request->{scheme} eq 'https'; $handle->connect(($self->_split_url($self->{proxy}))[0..2]); } else { $handle->connect($scheme, $host, $port); } $self->_prepare_headers_and_cb($request, $args); $handle->write_request($request); my $response; do { $response = $handle->read_response_header } until (substr($response->{status},0,1) ne '1'); if ( my @redir_args = $self->_maybe_redirect($request, $response, $args) ) { $handle->close; return $self->_request(@redir_args, $args); } if ($method eq 'HEAD' || $response->{status} =~ /^[23]04/) { # response has no message body } else { my $data_cb = $self->_prepare_data_cb($response, $args); $handle->read_body($data_cb, $response); } $handle->close; $response->{success} = substr($response->{status},0,1) eq '2'; return $response; } sub _prepare_headers_and_cb { my ($self, $request, $args) = @_; for ($self->{default_headers}, $args->{headers}) { next unless defined; while (my ($k, $v) = each %$_) { $request->{headers}{lc $k} = $v; } } $request->{headers}{'host'} = $request->{host_port}; $request->{headers}{'connection'} = "close"; $request->{headers}{'user-agent'} ||= $self->{agent}; if (defined $args->{content}) { $request->{headers}{'content-type'} ||= "application/octet-stream"; if (ref $args->{content} eq 'CODE') { $request->{headers}{'transfer-encoding'} = 'chunked' unless $request->{headers}{'content-length'} || $request->{headers}{'transfer-encoding'}; $request->{cb} = $args->{content}; } else { my $content = $args->{content}; if ( $] ge '5.008' ) { utf8::downgrade($content, 1) or Carp::croak(q/Wide character in request message body/); } $request->{headers}{'content-length'} = length $content unless $request->{headers}{'content-length'} || $request->{headers}{'transfer-encoding'}; $request->{cb} = sub { substr $content, 0, length $content, '' }; } $request->{trailer_cb} = $args->{trailer_callback} if ref $args->{trailer_callback} eq 'CODE'; } return; } sub _prepare_data_cb { my ($self, $response, $args) = @_; my $data_cb = $args->{data_callback}; $response->{content} = ''; if (!$data_cb || $response->{status} !~ /^2/) { if (defined $self->{max_size}) { $data_cb = sub { $_[1]->{content} .= $_[0]; die(qq/Size of response body exceeds the maximum allowed of $self->{max_size}\n/) if length $_[1]->{content} > $self->{max_size}; }; } else { $data_cb = sub { $_[1]->{content} .= $_[0] }; } } return $data_cb; } sub _maybe_redirect { my ($self, $request, $response, $args) = @_; my $headers = $response->{headers}; my ($status, $method) = ($response->{status}, $request->{method}); if (($status eq '303' or ($status =~ /^30[127]/ && $method =~ /^GET|HEAD$/)) and $headers->{location} and ++$args->{redirects} <= $self->{max_redirect} ) { my $location = ($headers->{location} =~ /^\//) ? "$request->{scheme}://$request->{host_port}$headers->{location}" : $headers->{location} ; return (($status eq '303' ? 'GET' : $method), $location); } return; } sub _split_url { my $url = pop; # URI regex adapted from the URI module my ($scheme, $authority, $path_query) = $url =~ m<\A([^:/?#]+)://([^/?#]*)([^#]*)> or Carp::croak(qq/Cannot parse URL: '$url'/); $scheme = lc $scheme; $path_query = "/$path_query" unless $path_query =~ m<\A/>; my $host = (length($authority)) ? lc $authority : 'localhost'; $host =~ s/\A[^@]*@//; # userinfo my $port = do { $host =~ s/:([0-9]*)\z// && length $1 ? $1 : ($scheme eq 'http' ? 80 : $scheme eq 'https' ? 443 : undef); }; return ($scheme, $host, $port, $path_query); } # Date conversions adapted from HTTP::Date my $DoW = "Sun|Mon|Tue|Wed|Thu|Fri|Sat"; my $MoY = "Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec"; sub _http_date { my ($sec, $min, $hour, $mday, $mon, $year, $wday) = gmtime($_[1]); return sprintf("%s, %02d %s %04d %02d:%02d:%02d GMT", substr($DoW,$wday*4,3), $mday, substr($MoY,$mon*4,3), $year+1900, $hour, $min, $sec ); } sub _parse_http_date { my ($self, $str) = @_; require Time::Local; my @tl_parts; if ($str =~ /^[SMTWF][a-z]+, +(\d{1,2}) ($MoY) +(\d\d\d\d) +(\d\d):(\d\d):(\d\d) +GMT$/) { @tl_parts = ($6, $5, $4, $1, (index($MoY,$2)/4), $3); } elsif ($str =~ /^[SMTWF][a-z]+, +(\d\d)-($MoY)-(\d{2,4}) +(\d\d):(\d\d):(\d\d) +GMT$/ ) { @tl_parts = ($6, $5, $4, $1, (index($MoY,$2)/4), $3); } elsif ($str =~ /^[SMTWF][a-z]+ +($MoY) +(\d{1,2}) +(\d\d):(\d\d):(\d\d) +(?:[^0-9]+ +)?(\d\d\d\d)$/ ) { @tl_parts = ($5, $4, $3, $2, (index($MoY,$1)/4), $6); } return eval { my $t = @tl_parts ? Time::Local::timegm(@tl_parts) : -1; $t < 0 ? undef : $t; }; } package HTTP::Tiny::Handle; # hide from PAUSE/indexers use strict; use warnings; use Carp qw[croak]; use Errno qw[EINTR EPIPE]; use IO::Socket qw[SOCK_STREAM]; sub BUFSIZE () { 32768 } my $Printable = sub { local $_ = shift; s/\r/\\r/g; s/\n/\\n/g; s/\t/\\t/g; s/([^\x20-\x7E])/sprintf('\\x%.2X', ord($1))/ge; $_; }; my $Token = qr/[\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7A\x7C\x7E]/; sub new { my ($class, %args) = @_; return bless { rbuf => '', timeout => 60, max_line_size => 16384, max_header_lines => 64, %args }, $class; } my $ssl_verify_args = { check_cn => "when_only", wildcards_in_alt => "anywhere", wildcards_in_cn => "anywhere" }; sub connect { @_ == 4 || croak(q/Usage: $handle->connect(scheme, host, port)/); my ($self, $scheme, $host, $port) = @_; if ( $scheme eq 'https' ) { eval "require IO::Socket::SSL" unless exists $INC{'IO/Socket/SSL.pm'}; croak(qq/IO::Socket::SSL must be installed for https support\n/) unless $INC{'IO/Socket/SSL.pm'}; } elsif ( $scheme ne 'http' ) { croak(qq/Unsupported URL scheme '$scheme'/); } $self->{fh} = 'IO::Socket::INET'->new( PeerHost => $host, PeerPort => $port, Proto => 'tcp', Type => SOCK_STREAM, Timeout => $self->{timeout} ) or croak(qq/Could not connect to '$host:$port': $@/); binmode($self->{fh}) or croak(qq/Could not binmode() socket: '$!'/); if ( $scheme eq 'https') { IO::Socket::SSL->start_SSL($self->{fh}); ref($self->{fh}) eq 'IO::Socket::SSL' or die(qq/SSL connection failed for $host\n/); $self->{fh}->verify_hostname( $host, $ssl_verify_args ) or die(qq/SSL certificate not valid for $host\n/); } $self->{host} = $host; $self->{port} = $port; return $self; } sub close { @_ == 1 || croak(q/Usage: $handle->close()/); my ($self) = @_; CORE::close($self->{fh}) or croak(qq/Could not close socket: '$!'/); } sub write { @_ == 2 || croak(q/Usage: $handle->write(buf)/); my ($self, $buf) = @_; if ( $] ge '5.008' ) { utf8::downgrade($buf, 1) or croak(q/Wide character in write()/); } my $len = length $buf; my $off = 0; local $SIG{PIPE} = 'IGNORE'; while () { $self->can_write or croak(q/Timed out while waiting for socket to become ready for writing/); my $r = syswrite($self->{fh}, $buf, $len, $off); if (defined $r) { $len -= $r; $off += $r; last unless $len > 0; } elsif ($! == EPIPE) { croak(qq/Socket closed by remote server: $!/); } elsif ($! != EINTR) { croak(qq/Could not write to socket: '$!'/); } } return $off; } sub read { @_ == 2 || @_ == 3 || croak(q/Usage: $handle->read(len [, allow_partial])/); my ($self, $len, $allow_partial) = @_; my $buf = ''; my $got = length $self->{rbuf}; if ($got) { my $take = ($got < $len) ? $got : $len; $buf = substr($self->{rbuf}, 0, $take, ''); $len -= $take; } while ($len > 0) { $self->can_read or croak(q/Timed out while waiting for socket to become ready for reading/); my $r = sysread($self->{fh}, $buf, $len, length $buf); if (defined $r) { last unless $r; $len -= $r; } elsif ($! != EINTR) { croak(qq/Could not read from socket: '$!'/); } } if ($len && !$allow_partial) { croak(q/Unexpected end of stream/); } return $buf; } sub readline { @_ == 1 || croak(q/Usage: $handle->readline()/); my ($self) = @_; while () { if ($self->{rbuf} =~ s/\A ([^\x0D\x0A]* \x0D?\x0A)//x) { return $1; } if (length $self->{rbuf} >= $self->{max_line_size}) { croak(qq/Line size exceeds the maximum allowed size of $self->{max_line_size}/); } $self->can_read or croak(q/Timed out while waiting for socket to become ready for reading/); my $r = sysread($self->{fh}, $self->{rbuf}, BUFSIZE, length $self->{rbuf}); if (defined $r) { last unless $r; } elsif ($! != EINTR) { croak(qq/Could not read from socket: '$!'/); } } croak(q/Unexpected end of stream while looking for line/); } sub read_header_lines { @_ == 1 || @_ == 2 || croak(q/Usage: $handle->read_header_lines([headers])/); my ($self, $headers) = @_; $headers ||= {}; my $lines = 0; my $val; while () { my $line = $self->readline; if (++$lines >= $self->{max_header_lines}) { croak(qq/Header lines exceeds maximum number allowed of $self->{max_header_lines}/); } elsif ($line =~ /\A ([^\x00-\x1F\x7F:]+) : [\x09\x20]* ([^\x0D\x0A]*)/x) { my ($field_name) = lc $1; if (exists $headers->{$field_name}) { for ($headers->{$field_name}) { $_ = [$_] unless ref $_ eq "ARRAY"; push @$_, $2; $val = \$_->[-1]; } } else { $val = \($headers->{$field_name} = $2); } } elsif ($line =~ /\A [\x09\x20]+ ([^\x0D\x0A]*)/x) { $val or croak(q/Unexpected header continuation line/); next unless length $1; $$val .= ' ' if length $$val; $$val .= $1; } elsif ($line =~ /\A \x0D?\x0A \z/x) { last; } else { croak(q/Malformed header line: / . $Printable->($line)); } } return $headers; } sub write_request { @_ == 2 || croak(q/Usage: $handle->write_request(request)/); my($self, $request) = @_; $self->write_request_header(@{$request}{qw/method uri headers/}); $self->write_body($request) if $request->{cb}; return; } my %HeaderCase = ( 'content-md5' => 'Content-MD5', 'etag' => 'ETag', 'te' => 'TE', 'www-authenticate' => 'WWW-Authenticate', 'x-xss-protection' => 'X-XSS-Protection', ); sub write_header_lines { (@_ == 2 && ref $_[1] eq 'HASH') || croak(q/Usage: $handle->write_header_lines(headers)/); my($self, $headers) = @_; my $buf = ''; while (my ($k, $v) = each %$headers) { my $field_name = lc $k; if (exists $HeaderCase{$field_name}) { $field_name = $HeaderCase{$field_name}; } else { $field_name =~ /\A $Token+ \z/xo or croak(q/Invalid HTTP header field name: / . $Printable->($field_name)); $field_name =~ s/\b(\w)/\u$1/g; $HeaderCase{lc $field_name} = $field_name; } for (ref $v eq 'ARRAY' ? @$v : $v) { /[^\x0D\x0A]/ or croak(qq/Invalid HTTP header field value ($field_name): / . $Printable->($_)); $buf .= "$field_name: $_\x0D\x0A"; } } $buf .= "\x0D\x0A"; return $self->write($buf); } sub read_body { @_ == 3 || croak(q/Usage: $handle->read_body(callback, response)/); my ($self, $cb, $response) = @_; my $te = $response->{headers}{'transfer-encoding'} || ''; if ( grep { /chunked/i } ( ref $te eq 'ARRAY' ? @$te : $te ) ) { $self->read_chunked_body($cb, $response); } else { $self->read_content_body($cb, $response); } return; } sub write_body { @_ == 2 || croak(q/Usage: $handle->write_body(request)/); my ($self, $request) = @_; if ($request->{headers}{'content-length'}) { return $self->write_content_body($request); } else { return $self->write_chunked_body($request); } } sub read_content_body { @_ == 3 || @_ == 4 || croak(q/Usage: $handle->read_content_body(callback, response, [read_length])/); my ($self, $cb, $response, $content_length) = @_; $content_length ||= $response->{headers}{'content-length'}; if ( $content_length ) { my $len = $content_length; while ($len > 0) { my $read = ($len > BUFSIZE) ? BUFSIZE : $len; $cb->($self->read($read, 0), $response); $len -= $read; } } else { my $chunk; $cb->($chunk, $response) while length( $chunk = $self->read(BUFSIZE, 1) ); } return; } sub write_content_body { @_ == 2 || croak(q/Usage: $handle->write_content_body(request)/); my ($self, $request) = @_; my ($len, $content_length) = (0, $request->{headers}{'content-length'}); while () { my $data = $request->{cb}->(); defined $data && length $data or last; if ( $] ge '5.008' ) { utf8::downgrade($data, 1) or croak(q/Wide character in write_content()/); } $len += $self->write($data); } $len == $content_length or croak(qq/Content-Length missmatch (got: $len expected: $content_length)/); return $len; } sub read_chunked_body { @_ == 3 || croak(q/Usage: $handle->read_chunked_body(callback, $response)/); my ($self, $cb, $response) = @_; while () { my $head = $self->readline; $head =~ /\A ([A-Fa-f0-9]+)/x or croak(q/Malformed chunk head: / . $Printable->($head)); my $len = hex($1) or last; $self->read_content_body($cb, $response, $len); $self->read(2) eq "\x0D\x0A" or croak(q/Malformed chunk: missing CRLF after chunk data/); } $self->read_header_lines($response->{headers}); return; } sub write_chunked_body { @_ == 2 || croak(q/Usage: $handle->write_chunked_body(request)/); my ($self, $request) = @_; my $len = 0; while () { my $data = $request->{cb}->(); defined $data && length $data or last; if ( $] ge '5.008' ) { utf8::downgrade($data, 1) or croak(q/Wide character in write_chunked_body()/); } $len += length $data; my $chunk = sprintf '%X', length $data; $chunk .= "\x0D\x0A"; $chunk .= $data; $chunk .= "\x0D\x0A"; $self->write($chunk); } $self->write("0\x0D\x0A"); $self->write_header_lines($request->{trailer_cb}->()) if ref $request->{trailer_cb} eq 'CODE'; return $len; } sub read_response_header { @_ == 1 || croak(q/Usage: $handle->read_response_header()/); my ($self) = @_; my $line = $self->readline; $line =~ /\A (HTTP\/(0*\d+\.0*\d+)) [\x09\x20]+ ([0-9]{3}) [\x09\x20]+ ([^\x0D\x0A]*) \x0D?\x0A/x or croak(q/Malformed Status-Line: / . $Printable->($line)); my ($protocol, $version, $status, $reason) = ($1, $2, $3, $4); croak (qq/Unsupported HTTP protocol: $protocol/) unless $version =~ /0*1\.0*[01]/; return { status => $status, reason => $reason, headers => $self->read_header_lines, protocol => $protocol, }; } sub write_request_header { @_ == 4 || croak(q/Usage: $handle->write_request_header(method, request_uri, headers)/); my ($self, $method, $request_uri, $headers) = @_; return $self->write("$method $request_uri HTTP/1.1\x0D\x0A") + $self->write_header_lines($headers); } sub _do_timeout { my ($self, $type, $timeout) = @_; $timeout = $self->{timeout} unless defined $timeout && $timeout >= 0; my $fd = fileno $self->{fh}; defined $fd && $fd >= 0 or croak(q/select(2): 'Bad file descriptor'/); my $initial = time; my $pending = $timeout; my $nfound; vec(my $fdset = '', $fd, 1) = 1; while () { $nfound = ($type eq 'read') ? select($fdset, undef, undef, $pending) : select(undef, $fdset, undef, $pending) ; if ($nfound == -1) { $! == EINTR or croak(qq/select(2): '$!'/); redo if !$timeout || ($pending = $timeout - (time - $initial)) > 0; $nfound = 0; } last; } $! = 0; return $nfound; } sub can_read { @_ == 1 || @_ == 2 || croak(q/Usage: $handle->can_read([timeout])/); my $self = shift; return $self->_do_timeout('read', @_) } sub can_write { @_ == 1 || @_ == 2 || croak(q/Usage: $handle->can_write([timeout])/); my $self = shift; return $self->_do_timeout('write', @_) } 1; __END__ =pod =head1 NAME HTTP::Tiny - A small, simple, correct HTTP/1.1 client =head1 VERSION version 0.012 =head1 SYNOPSIS use HTTP::Tiny; my $response = HTTP::Tiny->new->get('http://example.com/'); die "Failed!\n" unless $response->{success}; print "$response->{status} $response->{reason}\n"; while (my ($k, $v) = each %{$response->{headers}}) { for (ref $v eq 'ARRAY' ? @$v : $v) { print "$k: $_\n"; } } print $response->{content} if length $response->{content}; =head1 DESCRIPTION This is a very simple HTTP/1.1 client, designed primarily for doing simple GET requests without the overhead of a large framework like L. It is more correct and more complete than L. It supports proxies (currently only non-authenticating ones) and redirection. It also correctly resumes after EINTR. =head1 METHODS =head2 new $http = HTTP::Tiny->new( %attributes ); This constructor returns a new HTTP::Tiny object. Valid attributes include: =over 4 =item * agent A user-agent string (defaults to 'HTTP::Tiny/$VERSION') =item * default_headers A hashref of default headers to apply to requests =item * max_redirect Maximum number of redirects allowed (defaults to 5) =item * max_size Maximum response size (only when not using a data callback). If defined, responses larger than this will die with an error message =item * proxy URL of a proxy server to use. =item * timeout Request timeout in seconds (default is 60) =back =head2 get $response = $http->get($url); $response = $http->get($url, \%options); Executes a C request for the given URL. The URL must have unsafe characters escaped and international domain names encoded. Internally, it just calls C with 'GET' as the method. See C for valid options and a description of the response. =head2 mirror $response = $http->mirror($url, $file, \%options) if ( $response->{success} ) { print "$file is up to date\n"; } Executes a C request for the URL and saves the response body to the file name provided. The URL must have unsafe characters escaped and international domain names encoded. If the file already exists, the request will includes an C header with the modification timestamp of the file. You may specificy a different C header yourself in the C<< $options->{headers} >> hash. The C field of the response will be true if the status code is 2XX or 304 (unmodified). If the file was modified and the server response includes a properly formatted C header, the file modification time will be updated accordingly. =head2 request $response = $http->request($method, $url); $response = $http->request($method, $url, \%options); Executes an HTTP request of the given method type ('GET', 'HEAD', 'POST', 'PUT', etc.) on the given URL. The URL must have unsafe characters escaped and international domain names encoded. A hashref of options may be appended to modify the request. Valid options are: =over 4 =item * headers A hashref containing headers to include with the request. If the value for a header is an array reference, the header will be output multiple times with each value in the array. These headers over-write any default headers. =item * content A scalar to include as the body of the request OR a code reference that will be called iteratively to produce the body of the response =item * trailer_callback A code reference that will be called if it exists to provide a hashref of trailing headers (only used with chunked transfer-encoding) =item * data_callback A code reference that will be called for each chunks of the response body received. =back If the C option is a code reference, it will be called iteratively to provide the content body of the request. It should return the empty string or undef when the iterator is exhausted. If the C option is provided, it will be called iteratively until the entire response body is received. The first argument will be a string containing a chunk of the response body, the second argument will be the in-progress response hash reference, as described below. (This allows customizing the action of the callback based on the C or C received prior to the content body.) The C method returns a hashref containing the response. The hashref will have the following keys: =over 4 =item * success Boolean indicating whether the operation returned a 2XX status code =item * status The HTTP status code of the response =item * reason The response phrase returned by the server =item * content The body of the response. If the response does not have any content or if a data callback is provided to consume the response body, this will be the empty string =item * headers A hashref of header fields. All header field names will be normalized to be lower case. If a header is repeated, the value will be an arrayref; it will otherwise be a scalar string containing the value =back On an exception during the execution of the request, the C field will contain 599, and the C field will contain the text of the exception. =for Pod::Coverage agent default_headers max_redirect max_size proxy timeout =head1 LIMITATIONS HTTP::Tiny is I with the L. It attempts to meet all "MUST" requirements of the specification, but does not implement all "SHOULD" requirements. Some particular limitations of note include: =over =item * HTTP::Tiny focuses on correct transport. Users are responsible for ensuring that user-defined headers and content are compliant with the HTTP/1.1 specification. =item * Users must ensure that URLs are properly escaped for unsafe characters and that international domain names are properly encoded to ASCII. See L, L and L. =item * Redirection is very strict against the specification. Redirection is only automatic for response codes 301, 302 and 307 if the request method is 'GET' or 'HEAD'. Response code 303 is always converted into a 'GET' redirection, as mandated by the specification. There is no automatic support for status 305 ("Use proxy") redirections. =item * Persistant connections are not supported. The C header will always be set to C. =item * Direct C connections are supported only if L is installed. There is no support for C connections via proxy. Any SSL certificate that matches the host is accepted -- SSL certificates are not verified against certificate authorities. =item * Cookies are not directly supported. Users that set a C header should also set C to zero to ensure cookies are not inappropriately re-transmitted. =item * Proxy environment variables are not supported. =item * There is no provision for delaying a request body using an C header. Unexpected C<1XX> responses are silently ignored as per the specification. =item * Only 'chunked' C is supported. =item * There is no support for a Request-URI of '*' for the 'OPTIONS' request. =back =head1 SEE ALSO =over 4 =item * L =back =for :stopwords cpan testmatrix url annocpan anno bugtracker rt cpants kwalitee diff irc mailto metadata placeholders =head1 SUPPORT =head2 Bugs / Feature Requests Please report any bugs or feature requests by email to C, or through the web interface at L. You will be automatically notified of any progress on the request by the system. =head2 Source Code This is open source software. The code repository is available for public review and contribution under the terms of the license. L git clone git://github.com/dagolden/p5-http-tiny.git =head1 AUTHORS =over 4 =item * Christian Hansen =item * David Golden =back =head1 COPYRIGHT AND LICENSE This software is copyright (c) 2011 by Christian Hansen. This is free software; you can redistribute it and/or modify it under the same terms as the Perl 5 programming language system itself. =cut abled() && $] >= 5.003_06) { unless ($please_use_I18N_Collate_even_if_deprecated) { warnings::warn <<___EOD___; *** WARNING: starting from the Perl version 5.003_06 the I18N::Collate interface for comparing 8-bit scalar data according to the current locale HAS BEEN DEPRECATED That is, please do not use it anymore for any new applications and please migrate the old applications away from it because its functionality was integrated into the Perl core language in the release 5.003_06. See the perllocale manual page for further information. *** ___EOD___ $please_use_I18N_Collate_even_if_deprecated++; } } bless \$new; } sub setlocale { my ($category, $locale) = @_[0,1]; POSIX::setlocale($category, $locale) if (defined $category); # the current $LOCALE $LOCALE = $locale || $ENV{'LC_COLLATE'} || $ENV{'LC_ALL'} || ''; } sub C { my $s = ${$_[0]}; $C->{$LOCALE}->{$s} = collate_xfrm($s) unless (defined $C->{$LOCALE}->{$s}); # cache when met $C->{$LOCALE}->{$s}; } sub collate_xfrm { my $s = $_[0]; my $x = ''; for (split(/(\000+)/, $s)) { $x .= (/^\000/) ? $_ : strxfrm("$_\000"); } $x; } sub collate_cmp { &C($_[0]) cmp &C($_[1]); } # init $LOCALE &I18N::Collate::setlocale(); 1; # keep require happy . ..! Detect.pm"List.pmpackage I18N::Collate; use strict; our $VERSION = '1.02'; =head1 NAME I18N::Collate - compare 8-bit scalar data according to the current locale =head1 SYNOPSIS use I18N::Collate; setlocale(LC_COLLATE, 'locale-of-your-choice'); $s1 = I18N::Collate->new("scalar_data_1"); $s2 = I18N::Collate->new("scalar_data_2"); =head1 DESCRIPTION *** WARNING: starting from the Perl version 5.003_06 the I18N::Collate interface for comparing 8-bit scalar data according to the current locale HAS BEEN DEPRECATED That is, please do not use it anymore for any new applications and please migrate the old applications away from it because its functionality was integrated into the Perl core language in the release 5.003_06. See the perllocale manual page for further information. *** This module provides you with objects that will collate according to your national character set, provided that the POSIX setlocale() function is supported on your system. You can compare $s1 and $s2 above with $s1 le $s2 to extract the data itself, you'll need a dereference: $$s1 This module uses POSIX::setlocale(). The basic collation conversion is done by strxfrm() which terminates at NUL characters being a decent C routine. collate_xfrm() handles embedded NUL characters gracefully. The available locales depend on your operating system; try whether C shows them or man pages for "locale" or "nlsinfo" or the direct approach C or C or C. Not all the locales that your vendor supports are necessarily installed: please consult your operating system's documentation and possibly your local system administration. The locale names are probably something like C or C, for example C is the Swiss (CH) variant of French (fr), ISO Latin (8859) 1 (-1) which is the Western European character set. =cut # I18N::Collate.pm # # Author: Jarkko Hietaniemi > # Helsinki University of Technology, Finland # # Acks: Guy Decoux > understood # overloading magic much deeper than I and told # how to cut the size of this code by more than half. # (my first version did overload all of lt gt eq le ge cmp) # # Purpose: compare 8-bit scalar data according to the current locale # # Requirements: Perl5 POSIX::setlocale() and POSIX::strxfrm() # # Exports: setlocale 1) # collate_xfrm 2) # # Overloads: cmp # 3) # # Usage: use I18N::Collate; # setlocale(LC_COLLATE, 'locale-of-your-choice'); # 4) # $s1 = I18N::Collate->("scalar_data_1"); # $s2 = I18N::Collate->("scalar_data_2"); # # now you can compare $s1 and $s2: $s1 le $s2 # to extract the data itself, you need to deref: $$s1 # # Notes: # 1) this uses POSIX::setlocale # 2) the basic collation conversion is done by strxfrm() which # terminates at NUL characters being a decent C routine. # collate_xfrm handles embedded NUL characters gracefully. # 3) due to cmp and overload magic, lt le eq ge gt work also # 4) the available locales depend on your operating system; # try whether "locale -a" shows them or man pages for # "locale" or "nlsinfo" work or the more direct # approach "ls /usr/lib/nls/loc" or "ls /usr/lib/nls". # Not all the locales that your vendor supports # are necessarily installed: please consult your # operating system's documentation. # The locale names are probably something like # 'xx_XX.(ISO)?8859-N' or 'xx_XX.(ISO)?8859N', # for example 'fr_CH.ISO8859-1' is the Swiss (CH) # variant of French (fr), ISO Latin (8859) 1 (-1) # which is the Western European character set. # # Updated: 19961005 # # --- use POSIX qw(strxfrm LC_COLLATE); use warnings::register; require Exporter; our @ISA = qw(Exporter); our @EXPORT = qw(collate_xfrm setlocale LC_COLLATE); our @EXPORT_OK = qw(); use overload qw( fallback 1 cmp collate_cmp ); our($LOCALE, $C); our $please_use_I18N_Collate_even_if_deprecated = 0; sub new { my $new = $_[1]; if (warnings::en # Time-stamp: "2004-10-06 23:26:33 ADT" # Sean M. Burke require 5.000; package I18N::LangTags; use strict; use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $VERSION %Panic); require Exporter; @ISA = qw(Exporter); @EXPORT = qw(); @EXPORT_OK = qw(is_language_tag same_language_tag extract_language_tags super_languages similarity_language_tag is_dialect_of locale2language_tag alternate_language_tags encode_language_tag panic_languages implicate_supers implicate_supers_strictly ); %EXPORT_TAGS = ('ALL' => \@EXPORT_OK); $VERSION = "0.35_01"; sub uniq { my %seen; return grep(!($seen{$_}++), @_); } # a util function =head1 NAME I18N::LangTags - functions for dealing with RFC3066-style language tags =head1 SYNOPSIS use I18N::LangTags(); ...or specify whichever of those functions you want to import, like so: use I18N::LangTags qw(implicate_supers similarity_language_tag); All the exportable functions are listed below -- you're free to import only some, or none at all. By default, none are imported. If you say: use I18N::LangTags qw(:ALL) ...then all are exported. (This saves you from having to use something less obvious like C.) If you don't import any of these functions, assume a C<&I18N::LangTags::> in front of all the function names in the following examples. =head1 DESCRIPTION Language tags are a formalism, described in RFC 3066 (obsoleting 1766), for declaring what language form (language and possibly dialect) a given chunk of information is in. This library provides functions for common tasks involving language tags as they are needed in a variety of protocols and applications. Please see the "See Also" references for a thorough explanation of how to correctly use language tags. =over =cut ########################################################################### =item * the function is_language_tag($lang1) Returns true iff $lang1 is a formally valid language tag. is_language_tag("fr") is TRUE is_language_tag("x-jicarilla") is FALSE (Subtags can be 8 chars long at most -- 'jicarilla' is 9) is_language_tag("sgn-US") is TRUE (That's American Sign Language) is_language_tag("i-Klikitat") is TRUE (True without regard to the fact noone has actually registered Klikitat -- it's a formally valid tag) is_language_tag("fr-patois") is TRUE (Formally valid -- altho descriptively weak!) is_language_tag("Spanish") is FALSE is_language_tag("french-patois") is FALSE (No good -- first subtag has to match /^([xXiI]|[a-zA-Z]{2,3})$/ -- see RFC3066) is_language_tag("x-borg-prot2532") is TRUE (Yes, subtags can contain digits, as of RFC3066) =cut sub is_language_tag { ## Changes in the language tagging standards may have to be reflected here. my($tag) = lc($_[0]); return 0 if $tag eq "i" or $tag eq "x"; # Bad degenerate cases that the following # regexp would erroneously let pass return $tag =~ /^(?: # First subtag [xi] | [a-z]{2,3} ) (?: # Subtags thereafter - # separator [a-z0-9]{1,8} # subtag )* $/xs ? 1 : 0; } ########################################################################### =item * the function extract_language_tags($whatever) Returns a list of whatever looks like formally valid language tags in $whatever. Not very smart, so don't get too creative with what you want to feed it. extract_language_tags("fr, fr-ca, i-mingo") returns: ('fr', 'fr-ca', 'i-mingo') extract_language_tags("It's like this: I'm in fr -- French!") returns: ('It', 'in', 'fr') (So don't just feed it any old thing.) The output is untainted. If you don't know what tainting is, don't worry about it. =cut sub extract_language_tags { ## Changes in the language tagging standards may have to be reflected here. my($text) = $_[0] =~ m/(.+)/ # to make for an untainted result ? $1 : '' ; return grep(!m/^[ixIX]$/s, # 'i' and 'x' aren't good tags $text =~ m/ \b (?: # First subtag [iIxX] | [a-zA-Z]{2,3} ) (?: # Subtags thereafter - # separator [a-zA-Z0-9]{1,8} # subtag )* \b /xsg ); } ########################################################################### =item * the function same_language_tag($lang1, $lang2) Returns true iff $lang1 and $lang2 are acceptable variant tags representing the same language-form. same_language_tag('x-kadara', 'i-kadara') is TRUE (The x/i- alternation doesn't matter) same_language_tag('X-KADARA', 'i-kadara') is TRUE (...and neither does case) same_language_tag('en', 'en-US') is FALSE (all-English is not the SAME as US English) same_language_tag('x-kadara', 'x-kadar') is FALSE (these are totally unrelated tags) same_language_tag('no-bok', 'nb') is TRUE (no-bok is a legacy tag for nb (Norwegian Bokmal)) C works by just seeing whether C is the same as C. (Yes, I know this function is named a bit oddly. Call it historic reasons.) =cut sub same_language_tag { my $el1 = &encode_language_tag($_[0]); return 0 unless defined $el1; # this avoids the problem of # encode_language_tag($lang1) eq and encode_language_tag($lang2) # being true if $lang1 and $lang2 are both undef return $el1 eq &encode_language_tag($_[1]) ? 1 : 0; } ########################################################################### =item * the function similarity_language_tag($lang1, $lang2) Returns an integer representing the degree of similarity between tags $lang1 and $lang2 (the order of which does not matter), where similarity is the number of common elements on the left, without regard to case and to x/i- alternation. similarity_language_tag('fr', 'fr-ca') is 1 (one element in common) similarity_language_tag('fr-ca', 'fr-FR') is 1 (one element in common) similarity_language_tag('fr-CA-joual', 'fr-CA-PEI') is 2 similarity_language_tag('fr-CA-joual', 'fr-CA') is 2 (two elements in common) similarity_language_tag('x-kadara', 'i-kadara') is 1 (x/i- doesn't matter) similarity_language_tag('en', 'x-kadar') is 0 similarity_language_tag('x-kadara', 'x-kadar') is 0 (unrelated tags -- no similarity) similarity_language_tag('i-cree-syllabic', 'i-cherokee-syllabic') is 0 (no B elements in common!) =cut sub similarity_language_tag { my $lang1 = &encode_language_tag($_[0]); my $lang2 = &encode_language_tag($_[1]); # And encode_language_tag takes care of the whole # no-nyn==nn, i-hakka==zh-hakka, etc, things # NB: (i-sil-...)? (i-sgn-...)? return undef if !defined($lang1) and !defined($lang2); return 0 if !defined($lang1) or !defined($lang2); my @l1_subtags = split('-', $lang1); my @l2_subtags = split('-', $lang2); my $similarity = 0; while(@l1_subtags and @l2_subtags) { if(shift(@l1_subtags) eq shift(@l2_subtags)) { ++$similarity; } else { last; } } return $similarity; } ########################################################################### =item * the function is_dialect_of($lang1, $lang2) Returns true iff language tag $lang1 represents a subform of language tag $lang2. B is_dialect_of('en-US', 'en') is TRUE (American English IS a dialect of all-English) is_dialect_of('fr-CA-joual', 'fr-CA') is TRUE is_dialect_of('fr-CA-joual', 'fr') is TRUE (Joual is a dialect of (a dialect of) French) is_dialect_of('en', 'en-US') is FALSE (all-English is a NOT dialect of American English) is_dialect_of('fr', 'en-CA') is FALSE is_dialect_of('en', 'en' ) is TRUE is_dialect_of('en-US', 'en-US') is TRUE (B these are degenerate cases) is_dialect_of('i-mingo-tom', 'x-Mingo') is TRUE (the x/i thing doesn't matter, nor does case) is_dialect_of('nn', 'no') is TRUE (because 'nn' (New Norse) is aliased to 'no-nyn', as a special legacy case, and 'no-nyn' is a subform of 'no' (Norwegian)) =cut sub is_dialect_of { my $lang1 = &encode_language_tag($_[0]); my $lang2 = &encode_language_tag($_[1]); return undef if !defined($lang1) and !defined($lang2); return 0 if !defined($lang1) or !defined($lang2); return 1 if $lang1 eq $lang2; return 0 if length($lang1) < length($lang2); $lang1 .= '-'; $lang2 .= '-'; return (substr($lang1, 0, length($lang2)) eq $lang2) ? 1 : 0; } ########################################################################### =item * the function super_languages($lang1) Returns a list of language tags that are superordinate tags to $lang1 -- it gets this by removing subtags from the end of $lang1 until nothing (or just "i" or "x") is left. super_languages("fr-CA-joual") is ("fr-CA", "fr") super_languages("en-AU") is ("en") super_languages("en") is empty-list, () super_languages("i-cherokee") is empty-list, () ...not ("i"), which would be illegal as well as pointless. If $lang1 is not a valid language tag, returns empty-list in a list context, undef in a scalar context. A notable and rather unavoidable problem with this method: "x-mingo-tom" has an "x" because the whole tag isn't an IANA-registered tag -- but super_languages('x-mingo-tom') is ('x-mingo') -- which isn't really right, since 'i-mingo' is registered. But this module has no way of knowing that. (But note that same_language_tag('x-mingo', 'i-mingo') is TRUE.) More importantly, you assume I that superordinates of $lang1 are mutually intelligible with $lang1. Consider this carefully. =cut sub super_languages { my $lang1 = $_[0]; return() unless defined($lang1) && &is_language_tag($lang1); # a hack for those annoying new (2001) tags: $lang1 =~ s/^nb\b/no-bok/i; # yes, backwards $lang1 =~ s/^nn\b/no-nyn/i; # yes, backwards $lang1 =~ s/^[ix](-hakka\b)/zh$1/i; # goes the right way # i-hakka-bork-bjork-bjark => zh-hakka-bork-bjork-bjark my @l1_subtags = split('-', $lang1); ## Changes in the language tagging standards may have to be reflected here. # NB: (i-sil-...)? my @supers = (); foreach my $bit (@l1_subtags) { push @supers, scalar(@supers) ? ($supers[-1] . '-' . $bit) : $bit; } pop @supers if @supers; shift @supers if @supers && $supers[0] =~ m<^[iIxX]$>s; return reverse @supers; } ########################################################################### =item * the function locale2language_tag($locale_identifier) This takes a locale name (like "en", "en_US", or "en_US.ISO8859-1") and maps it to a language tag. If it's not mappable (as with, notably, "C" and "POSIX"), this returns empty-list in a list context, or undef in a scalar context. locale2language_tag("en") is "en" locale2language_tag("en_US") is "en-US" locale2language_tag("en_US.ISO8859-1") is "en-US" locale2language_tag("C") is undef or () locale2language_tag("POSIX") is undef or () locale2language_tag("POSIX") is undef or () I'm not totally sure that locale names map satisfactorily to language tags. Think REAL hard about how you use this. YOU HAVE BEEN WARNED. The output is untainted. If you don't know what tainting is, don't worry about it. =cut sub locale2language_tag { my $lang = $_[0] =~ m/(.+)/ # to make for an untainted result ? $1 : '' ; return $lang if &is_language_tag($lang); # like "en" $lang =~ tr<_><->; # "en_US" -> en-US $lang =~ s<(?:[\.\@][-_a-zA-Z0-9]+)+$><>s; # "en_US.ISO8859-1" -> en-US # it_IT.utf8@euro => it-IT return $lang if &is_language_tag($lang); return; } ########################################################################### =item * the function encode_language_tag($lang1) This function, if given a language tag, returns an encoding of it such that: * tags representing different languages never get the same encoding. * tags representing the same language always get the same encoding. * an encoding of a formally valid language tag always is a string value that is defined, has length, and is true if considered as a boolean. Note that the encoding itself is B a formally valid language tag. Note also that you cannot, currently, go from an encoding back to a language tag that it's an encoding of. Note also that you B consider the encoded value as atomic; i.e., you should not consider it as anything but an opaque, unanalysable string value. (The internals of the encoding method may change in future versions, as the language tagging standard changes over time.) C returns undef if given anything other than a formally valid language tag. The reason C exists is because different language tags may represent the same language; this is normally treatable with C, but consider this situation: You have a data file that expresses greetings in different languages. Its format is "[language tag]=[how to say 'Hello']", like: en-US=Hiho fr=Bonjour i-mingo=Hau' And suppose you write a program that reads that file and then runs as a daemon, answering client requests that specify a language tag and then expect the string that says how to greet in that language. So an interaction looks like: greeting-client asks: fr greeting-server answers: Bonjour So far so good. But suppose the way you're implementing this is: my %greetings; die unless open(IN, ") { chomp; next unless /^([^=]+)=(.+)/s; my($lang, $expr) = ($1, $2); $greetings{$lang} = $expr; } close(IN); at which point %greetings has the contents: "en-US" => "Hiho" "fr" => "Bonjour" "i-mingo" => "Hau'" And suppose then that you answer client requests for language $wanted by just looking up $greetings{$wanted}. If the client asks for "fr", that will look up successfully in %greetings, to the value "Bonjour". And if the client asks for "i-mingo", that will look up successfully in %greetings, to the value "Hau'". But if the client asks for "i-Mingo" or "x-mingo", or "Fr", then the lookup in %greetings fails. That's the Wrong Thing. You could instead do lookups on $wanted with: use I18N::LangTags qw(same_language_tag); my $response = ''; foreach my $l2 (keys %greetings) { if(same_language_tag($wanted, $l2)) { $response = $greetings{$l2}; last; } } But that's rather inefficient. A better way to do it is to start your program with: use I18N::LangTags qw(encode_language_tag); my %greetings; die unless open(IN, ") { chomp; next unless /^([^=]+)=(.+)/s; my($lang, $expr) = ($1, $2); $greetings{ encode_language_tag($lang) } = $expr; } close(IN); and then just answer client requests for language $wanted by just looking up $greetings{encode_language_tag($wanted)} And that does the Right Thing. =cut sub encode_language_tag { # Only similarity_language_tag() is allowed to analyse encodings! ## Changes in the language tagging standards may have to be reflected here. my($tag) = $_[0] || return undef; return undef unless &is_language_tag($tag); # For the moment, these legacy variances are few enough that # we can just handle them here with regexps. $tag =~ s/^iw\b/he/i; # Hebrew $tag =~ s/^in\b/id/i; # Indonesian $tag =~ s/^cre\b/cr/i; # Cree $tag =~ s/^jw\b/jv/i; # Javanese $tag =~ s/^[ix]-lux\b/lb/i; # Luxemburger $tag =~ s/^[ix]-navajo\b/nv/i; # Navajo $tag =~ s/^ji\b/yi/i; # Yiddish # SMB 2003 -- Hm. There's a bunch of new XXX->YY variances now, # but maybe they're all so obscure I can ignore them. "Obscure" # meaning either that the language is obscure, and/or that the # XXX form was extant so briefly that it's unlikely it was ever # used. I hope. # # These go FROM the simplex to complex form, to get # similarity-comparison right. And that's okay, since # similarity_language_tag is the only thing that # analyzes our output. $tag =~ s/^[ix]-hakka\b/zh-hakka/i; # Hakka $tag =~ s/^nb\b/no-bok/i; # BACKWARDS for Bokmal $tag =~ s/^nn\b/no-nyn/i; # BACKWARDS for Nynorsk $tag =~ s/^[xiXI]-//s; # Just lop off any leading "x/i-" return "~" . uc($tag); } #-------------------------------------------------------------------------- =item * the function alternate_language_tags($lang1) This function, if given a language tag, returns all language tags that are alternate forms of this language tag. (I.e., tags which refer to the same language.) This is meant to handle legacy tags caused by the minor changes in language tag standards over the years; and the x-/i- alternation is also dealt with. Note that this function does I try to equate new (and never-used, and unusable) ISO639-2 three-letter tags to old (and still in use) ISO639-1 two-letter equivalents -- like "ara" -> "ar" -- because "ara" has I been in use as an Internet language tag, and RFC 3066 stipulates that it never should be, since a shorter tag ("ar") exists. Examples: alternate_language_tags('no-bok') is ('nb') alternate_language_tags('nb') is ('no-bok') alternate_language_tags('he') is ('iw') alternate_language_tags('iw') is ('he') alternate_language_tags('i-hakka') is ('zh-hakka', 'x-hakka') alternate_language_tags('zh-hakka') is ('i-hakka', 'x-hakka') alternate_language_tags('en') is () alternate_language_tags('x-mingo-tom') is ('i-mingo-tom') alternate_language_tags('x-klikitat') is ('i-klikitat') alternate_language_tags('i-klikitat') is ('x-klikitat') This function returns empty-list if given anything other than a formally valid language tag. =cut my %alt = qw( i x x i I X X I ); sub alternate_language_tags { my $tag = $_[0]; return() unless &is_language_tag($tag); my @em; # push 'em real goood! # For the moment, these legacy variances are few enough that # we can just handle them here with regexps. if( $tag =~ m/^[ix]-hakka\b(.*)/i) {push @em, "zh-hakka$1"; } elsif($tag =~ m/^zh-hakka\b(.*)/i) { push @em, "x-hakka$1", "i-hakka$1"; } elsif($tag =~ m/^he\b(.*)/i) { push @em, "iw$1"; } elsif($tag =~ m/^iw\b(.*)/i) { push @em, "he$1"; } elsif($tag =~ m/^in\b(.*)/i) { push @em, "id$1"; } elsif($tag =~ m/^id\b(.*)/i) { push @em, "in$1"; } elsif($tag =~ m/^[ix]-lux\b(.*)/i) { push @em, "lb$1"; } elsif($tag =~ m/^lb\b(.*)/i) { push @em, "i-lux$1", "x-lux$1"; } elsif($tag =~ m/^[ix]-navajo\b(.*)/i) { push @em, "nv$1"; } elsif($tag =~ m/^nv\b(.*)/i) { push @em, "i-navajo$1", "x-navajo$1"; } elsif($tag =~ m/^yi\b(.*)/i) { push @em, "ji$1"; } elsif($tag =~ m/^ji\b(.*)/i) { push @em, "yi$1"; } elsif($tag =~ m/^nb\b(.*)/i) { push @em, "no-bok$1"; } elsif($tag =~ m/^no-bok\b(.*)/i) { push @em, "nb$1"; } elsif($tag =~ m/^nn\b(.*)/i) { push @em, "no-nyn$1"; } elsif($tag =~ m/^no-nyn\b(.*)/i) { push @em, "nn$1"; } push @em, $alt{$1} . $2 if $tag =~ /^([XIxi])(-.+)/; return @em; } ########################################################################### { # Init %Panic... my @panic = ( # MUST all be lowercase! # Only large ("national") languages make it in this list. # If you, as a user, are so bizarre that the /only/ language # you claim to accept is Galician, then no, we won't do you # the favor of providing Catalan as a panic-fallback for # you. Because if I start trying to add "little languages" in # here, I'll just go crazy. # Scandinavian lgs. All based on opinion and hearsay. 'sv' => [qw(nb no da nn)], 'da' => [qw(nb no sv nn)], # I guess [qw(no nn nb)], [qw(no nn nb sv da)], 'is' => [qw(da sv no nb nn)], 'fo' => [qw(da is no nb nn sv)], # I guess # I think this is about the extent of tolerable intelligibility # among large modern Romance languages. 'pt' => [qw(es ca it fr)], # Portuguese, Spanish, Catalan, Italian, French 'ca' => [qw(es pt it fr)], 'es' => [qw(ca it fr pt)], 'it' => [qw(es fr ca pt)], 'fr' => [qw(es it ca pt)], # Also assume that speakers of the main Indian languages prefer # to read/hear Hindi over English [qw( as bn gu kn ks kok ml mni mr ne or pa sa sd te ta ur )] => 'hi', # Assamese, Bengali, Gujarati, [Hindi,] Kannada (Kanarese), Kashmiri, # Konkani, Malayalam, Meithei (Manipuri), Marathi, Nepali, Oriya, # Punjabi, Sanskrit, Sindhi, Telugu, Tamil, and Urdu. 'hi' => [qw(bn pa as or)], # I welcome finer data for the other Indian languages. # E.g., what should Oriya's list be, besides just Hindi? # And the panic languages for English is, of course, nil! # My guesses at Slavic intelligibility: ([qw(ru be uk)]) x 2, # Russian, Belarusian, Ukranian 'sr' => 'hr', 'hr' => 'sr', # Serb + Croat 'cs' => 'sk', 'sk' => 'cs', # Czech + Slovak 'ms' => 'id', 'id' => 'ms', # Malay + Indonesian 'et' => 'fi', 'fi' => 'et', # Estonian + Finnish #?? 'lo' => 'th', 'th' => 'lo', # Lao + Thai ); my($k,$v); while(@panic) { ($k,$v) = splice(@panic,0,2); foreach my $k (ref($k) ? @$k : $k) { foreach my $v (ref($v) ? @$v : $v) { push @{$Panic{$k} ||= []}, $v unless $k eq $v; } } } } =item * the function @langs = panic_languages(@accept_languages) This function takes a list of 0 or more language tags that constitute a given user's Accept-Language list, and returns a list of tags for I (non-super) languages that are probably acceptable to the user, to be used I. For example, if a user accepts only 'ca' (Catalan) and 'es' (Spanish), and the documents/interfaces you have available are just in German, Italian, and Chinese, then the user will most likely want the Italian one (and not the Chinese or German one!), instead of getting nothing. So C returns a list containing 'it' (Italian). English ('en') is I in the return list, but whether it's at the very end or not depends on the input languages. This function works by consulting an internal table that stipulates what common languages are "close" to each other. A useful construct you might consider using is: @fallbacks = super_languages(@accept_languages); push @fallbacks, panic_languages( @accept_languages, @fallbacks, ); =cut sub panic_languages { # When in panic or in doubt, run in circles, scream, and shout! my(@out, %seen); foreach my $t (@_) { next unless $t; next if $seen{$t}++; # so we don't return it or hit it again # push @out, super_languages($t); # nah, keep that separate push @out, @{ $Panic{lc $t} || next }; } return grep !$seen{$_}++, @out, 'en'; } #--------------------------------------------------------------------------- #--------------------------------------------------------------------------- =item * the function implicate_supers( ...languages... ) This takes a list of strings (which are presumed to be language-tags; strings that aren't, are ignored); and after each one, this function inserts super-ordinate forms that don't already appear in the list. The original list, plus these insertions, is returned. In other words, it takes this: pt-br de-DE en-US fr pt-br-janeiro and returns this: pt-br pt de-DE de en-US en fr pt-br-janeiro This function is most useful in the idiom implicate_supers( I18N::LangTags::Detect::detect() ); (See L.) =item * the function implicate_supers_strictly( ...languages... ) This works like C except that the implicated forms are added to the end of the return list. In other words, implicate_supers_strictly takes a list of strings (which are presumed to be language-tags; strings that aren't, are ignored) and after the whole given list, it inserts the super-ordinate forms of all given tags, minus any tags that already appear in the input list. In other words, it takes this: pt-br de-DE en-US fr pt-br-janeiro and returns this: pt-br de-DE en-US fr pt-br-janeiro pt de en The reason this function has "_strictly" in its name is that when you're processing an Accept-Language list according to the RFCs, if you interpret the RFCs quite strictly, then you would use implicate_supers_strictly, but for normal use (i.e., common-sense use, as far as I'm concerned) you'd use implicate_supers. =cut sub implicate_supers { my @languages = grep is_language_tag($_), @_; my %seen_encoded; foreach my $lang (@languages) { $seen_encoded{ I18N::LangTags::encode_language_tag($lang) } = 1 } my(@output_languages); foreach my $lang (@languages) { push @output_languages, $lang; foreach my $s ( I18N::LangTags::super_languages($lang) ) { # Note that super_languages returns the longest first. last if $seen_encoded{ I18N::LangTags::encode_language_tag($s) }; push @output_languages, $s; } } return uniq( @output_languages ); } sub implicate_supers_strictly { my @tags = grep is_language_tag($_), @_; return uniq( @_, map super_languages($_), @_ ); } ########################################################################### 1; __END__ =back =head1 ABOUT LOWERCASING I've considered making all the above functions that output language tags return all those tags strictly in lowercase. Having all your language tags in lowercase does make some things easier. But you might as well just lowercase as you like, or call C where appropriate. =head1 ABOUT UNICODE PLAINTEXT LANGUAGE TAGS In some future version of I18N::LangTags, I plan to include support for RFC2482-style language tags -- which are basically just normal language tags with their ASCII characters shifted into Plane 14. =head1 SEE ALSO * L * RFC 3066, C, "Tags for the Identification of Languages". (Obsoletes RFC 1766) * RFC 2277, C, "IETF Policy on Character Sets and Languages". * RFC 2231, C, "MIME Parameter Value and Encoded Word Extensions: Character Sets, Languages, and Continuations". * RFC 2482, C, "Language Tagging in Unicode Plain Text". * Locale::Codes, in C * ISO 639-2, "Codes for the representation of names of languages", including two-letter and three-letter codes, C * The IANA list of registered languages (hopefully up-to-date), C =head1 COPYRIGHT Copyright (c) 1998+ Sean M. Burke. All rights reserved. This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. The programs and documentation in this dist are distributed in the hope that they will be useful, but without any warranty; without even the implied warranty of merchantability or fitness for a particular purpose. =head1 AUTHOR Sean M. Burke C =cut # Time-stamp: "2004-06-20 21:47:55 ADT" require 5; package I18N::LangTags::Detect; use strict; use vars qw( @ISA $VERSION $MATCH_SUPERS $USING_LANGUAGE_TAGS $USE_LITERALS $MATCH_SUPERS_TIGHTLY); BEGIN { unless(defined &DEBUG) { *DEBUG = sub () {0} } } # define the constant 'DEBUG' at compile-time $VERSION = "1.05"; @ISA = (); use I18N::LangTags qw(alternate_language_tags locale2language_tag); sub _uniq { my %seen; return grep(!($seen{$_}++), @_); } sub _normalize { my(@languages) = map lc($_), grep $_, map {; $_, alternate_language_tags($_) } @_; return _uniq(@languages) if wantarray; return $languages[0]; } #--------------------------------------------------------------------------- # The extent of our functional interface: sub detect () { return __PACKAGE__->ambient_langprefs; } #=========================================================================== sub ambient_langprefs { # always returns things untainted my $base_class = $_[0]; return $base_class->http_accept_langs if length( $ENV{'REQUEST_METHOD'} || '' ); # I'm a CGI # it's off in its own routine because it's complicated # Not running as a CGI: try to puzzle out from the environment my @languages; foreach my $envname (qw( LANGUAGE LC_ALL LC_MESSAGES LANG )) { next unless $ENV{$envname}; DEBUG and print "Noting \$$envname: $ENV{$envname}\n"; push @languages, map locale2language_tag($_), # if it's a lg tag, fine, pass thru (untainted) # if it's a locale ID, try converting to a lg tag (untainted), # otherwise nix it. split m/[,:]/, $ENV{$envname} ; last; # first one wins } if($ENV{'IGNORE_WIN32_LOCALE'}) { # no-op } elsif(&_try_use('Win32::Locale')) { # If we have that module installed... push @languages, Win32::Locale::get_language() || '' if defined &Win32::Locale::get_language; } return _normalize @languages; } #--------------------------------------------------------------------------- sub http_accept_langs { # Deal with HTTP "Accept-Language:" stuff. Hassle. # This code is more lenient than RFC 3282, which you must read. # Hm. Should I just move this into I18N::LangTags at some point? no integer; my $in = (@_ > 1) ? $_[1] : $ENV{'HTTP_ACCEPT_LANGUAGE'}; # (always ends up untainting) return() unless defined $in and length $in; $in =~ s/\([^\)]*\)//g; # nix just about any comment if( $in =~ m/^\s*([a-zA-Z][-a-zA-Z]+)\s*$/s ) { # Very common case: just one language tag return _normalize $1; } elsif( $in =~ m/^\s*[a-zA-Z][-a-zA-Z]+(?:\s*,\s*[a-zA-Z][-a-zA-Z]+)*\s*$/s ) { # Common case these days: just "foo, bar, baz" return _normalize( $in =~ m/([a-zA-Z][-a-zA-Z]+)/g ); } # Else it's complicated... $in =~ s/\s+//g; # Yes, we can just do without the WS! my @in = $in =~ m/([^,]+)/g; my %pref; my $q; foreach my $tag (@in) { next unless $tag =~ m/^([a-zA-Z][-a-zA-Z]+) (?: ;q= ( \d* # a bit too broad of a RE, but so what. (?: \.\d+ )? ) )? $ /sx ; $q = (defined $2 and length $2) ? $2 : 1; #print "$1 with q=$q\n"; push @{ $pref{$q} }, lc $1; } return _normalize( # Read off %pref, in descending key order... map @{$pref{$_}}, sort {$b <=> $a} keys %pref ); } #=========================================================================== my %tried = (); # memoization of whether we've used this module, or found it unusable. sub _try_use { # Basically a wrapper around "require Modulename" # "Many men have tried..." "They tried and failed?" "They tried and died." return $tried{$_[0]} if exists $tried{$_[0]}; # memoization my $module = $_[0]; # ASSUME sane module name! { no strict 'refs'; no warnings 'once'; return($tried{$module} = 1) if %{$module . "::Lexicon"} or @{$module . "::ISA"}; # weird case: we never use'd it, but there it is! } print " About to use $module ...\n" if DEBUG; { local $SIG{'__DIE__'}; eval "require $module"; # used to be "use $module", but no point in that. } if($@) { print "Error using $module \: $@\n" if DEBUG > 1; return $tried{$module} = 0; } else { print " OK, $module is used\n" if DEBUG; return $tried{$module} = 1; } } #--------------------------------------------------------------------------- 1; __END__ =head1 NAME I18N::LangTags::Detect - detect the user's language preferences =head1 SYNOPSIS use I18N::LangTags::Detect; my @user_wants = I18N::LangTags::Detect::detect(); =head1 DESCRIPTION It is a common problem to want to detect what language(s) the user would prefer output in. =head1 FUNCTIONS This module defines one public function, C. This function is not exported (nor is even exportable), and it takes no parameters. In scalar context, the function returns the most preferred language tag (or undef if no preference was seen). In list context (which is usually what you want), the function returns a (possibly empty) list of language tags representing (best first) what languages the user apparently would accept output in. You will probably want to pass the output of this through C or C, like so: my @languages = I18N::LangTags::implicate_supers_tightly( I18N::LangTags::Detect::detect() ); =head1 ENVIRONMENT This module looks for several environment variables, including REQUEST_METHOD, HTTP_ACCEPT_LANGUAGE, LANGUAGE, LC_ALL, LC_MESSAGES, and LANG. It will also use the L module, if it's installed. =head1 SEE ALSO L, L, L. (This module's core code started out as a routine in Locale::Maketext; but I moved it here once I realized it was more generally useful.) =head1 COPYRIGHT Copyright (c) 1998-2004 Sean M. Burke. All rights reserved. This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. The programs and documentation in this dist are distributed in the hope that they will be useful, but without any warranty; without even the implied warranty of merchantability or fitness for a particular purpose. =head1 AUTHOR Sean M. Burke C =cut # a tip: Put a bit of chopped up pickled ginger in your salad. It's tasty! $a->[0] cmp $b->[0] } @codes; print "[ based on $url\n at ", scalar(localtime), "]\n", "[Note: doesn't include IANA-registered codes.]\n"; exit; __END__ # ./3 ..$Compress8 UncompressEZlib.pm$ .# ..%Adapter)Base.pm*Base%W,Bzip2.pm- Deflate.pm.Gzip.pm/Gzip%W1 RawDeflate.pm2Zip.pm3 Zip5<Zlib%W require 5; package I18N::LangTags::List; # Time-stamp: "2004-10-06 23:26:21 ADT" use strict; use vars qw(%Name %Is_Disrec $Debug $VERSION); $VERSION = '0.35_01'; # POD at the end. #---------------------------------------------------------------------- { # read the table out of our own POD! my $seeking = 1; my $count = 0; my($disrec,$tag,$name); my $last_name = ''; while() { if($seeking) { $seeking = 0 if m/=for woohah/; } elsif( ($disrec, $tag, $name) = m/(\[?)\{([-0-9a-zA-Z]+)\}(?:\s*:)?\s*([^\[\]]+)/ ) { $name =~ s/\s*[;\.]*\s*$//g; next unless $name; ++$count; print "<$tag> <$name>\n" if $Debug; $last_name = $Name{$tag} = $name; $Is_Disrec{$tag} = 1 if $disrec; } elsif (m/[Ff]ormerly \"([-a-z0-9]+)\"/) { $Name{$1} = "$last_name (old tag)" if $last_name; $Is_Disrec{$1} = 1; } } die "No tags read??" unless $count; } #---------------------------------------------------------------------- sub name { my $tag = lc($_[0] || return); $tag =~ s/^\s+//s; $tag =~ s/\s+$//s; my $alt; if($tag =~ m/^x-(.+)/) { $alt = "i-$1"; } elsif($tag =~ m/^i-(.+)/) { $alt = "x-$1"; } else { $alt = ''; } my $subform = ''; my $name = ''; print "Input: {$tag}\n" if $Debug; while(length $tag) { last if $name = $Name{$tag}; last if $name = $Name{$alt}; if($tag =~ s/(-[a-z0-9]+)$//s) { print "Shaving off: $1 leaving $tag\n" if $Debug; $subform = "$1$subform"; # and loop around again $alt =~ s/(-[a-z0-9]+)$//s && $Debug && print " alt -> $alt\n"; } else { # we're trying to pull a subform off a primary tag. TILT! print "Aborting on: {$name}{$subform}\n" if $Debug; last; } } print "Output: {$name}{$subform}\n" if $Debug; return unless $name; # Failure return $name unless $subform; # Exact match $subform =~ s/^-//s; $subform =~ s/-$//s; return "$name (Subform \"$subform\")"; } #-------------------------------------------------------------------------- sub is_decent { my $tag = lc($_[0] || return 0); #require I18N::LangTags; return 0 unless $tag =~ /^(?: # First subtag [xi] | [a-z]{2,3} ) (?: # Subtags thereafter - # separator [a-z0-9]{1,8} # subtag )* $/xs; my @supers = (); foreach my $bit (split('-', $tag)) { push @supers, scalar(@supers) ? ($supers[-1] . '-' . $bit) : $bit; } return 0 unless @supers; shift @supers if $supers[0] =~ m<^(i|x|sgn)$>s; return 0 unless @supers; foreach my $f ($tag, @supers) { return 0 if $Is_Disrec{$f}; return 2 if $Name{$f}; # so that decent subforms of indecent tags are decent } return 2 if $Name{$tag}; # not only is it decent, it's known! return 1; } #-------------------------------------------------------------------------- 1; __DATA__ =head1 NAME I18N::LangTags::List -- tags and names for human languages =head1 SYNOPSIS use I18N::LangTags::List; print "Parlez-vous... ", join(', ', I18N::LangTags::List::name('elx') || 'unknown_language', I18N::LangTags::List::name('ar-Kw') || 'unknown_language', I18N::LangTags::List::name('en') || 'unknown_language', I18N::LangTags::List::name('en-CA') || 'unknown_language', ), "?\n"; prints: Parlez-vous... Elamite, Kuwait Arabic, English, Canadian English? =head1 DESCRIPTION This module provides a function C ) > that takes a language tag (see L) and returns the best attempt at an English name for it, or undef if it can't make sense of the tag. The function I18N::LangTags::List::name(...) is not exported. This module also provides a function C )> that returns true iff the language tag is syntactically valid and is for general use (like "fr" or "fr-ca", below). That is, it returns false for tags that are syntactically invalid and for tags, like "aus", that are listed in brackets below. This function is not exported. The map of tags-to-names that it uses is accessible as %I18N::LangTags::List::Name, and it's the same as the list that follows in this documentation, which should be useful to you even if you don't use this module. =head1 ABOUT LANGUAGE TAGS Internet language tags, as defined in RFC 3066, are a formalism for denoting human languages. The two-letter ISO 639-1 language codes are well known (as "en" for English), as are their forms when qualified by a country code ("en-US"). Less well-known are the arbitrary-length non-ISO codes (like "i-mingo"), and the recently (in 2001) introduced three-letter ISO-639-2 codes. Remember these important facts: =over =item * Language tags are not locale IDs. A locale ID is written with a "_" instead of a "-", (almost?) always matches C, and I something different than a language tag. A language tag denotes a language. A locale ID denotes a language I a particular place, in combination with non-linguistic location-specific information such as what currency is used there. Locales I often denote character set information, as in "en_US.ISO8859-1". =item * Language tags are not for computer languages. =item * "Dialect" is not a useful term, since there is no objective criterion for establishing when two language-forms are dialects of eachother, or are separate languages. =item * Language tags are not case-sensitive. en-US, en-us, En-Us, etc., are all the same tag, and denote the same language. =item * Not every language tag really refers to a single language. Some language tags refer to conditions: i-default (system-message text in English plus maybe other languages), und (undetermined language). Others (notably lots of the three-letter codes) are bibliographic tags that classify whole groups of languages, as with cus "Cushitic (Other)" (i.e., a language that has been classed as Cushtic, but which has no more specific code) or the even less linguistically coherent sai for "South American Indian (Other)". Though useful in bibliography, B. For further guidance, email me. =item * Language tags are not country codes. In fact, they are often distinct codes, as with language tag ja for Japanese, and ISO 3166 country code C<.jp> for Japan. =back =head1 LIST OF LANGUAGES The first part of each item is the language tag, between {...}. It is followed by an English name for the language or language-group. Language tags that I judge to be not for general use, are bracketed. This list is in alphabetical order by English name of the language. =for reminder The name in the =item line MUST NOT have E<...>'s in it!! =for woohah START =over =item {ab} : Abkhazian eq Abkhaz =item {ace} : Achinese =item {ach} : Acoli =item {ada} : Adangme =item {ady} : Adyghe eq Adygei =item {aa} : Afar =item {afh} : Afrihili (Artificial) =item {af} : Afrikaans =item [{afa} : Afro-Asiatic (Other)] =item {ak} : Akan (Formerly "aka".) =item {akk} : Akkadian (Historical) =item {sq} : Albanian =item {ale} : Aleut =item [{alg} : Algonquian languages] NOT Algonquin! =item [{tut} : Altaic (Other)] =item {am} : Amharic NOT Aramaic! =item {i-ami} : Ami eq Amis. eq 'Amis. eq Pangca. =item [{apa} : Apache languages] =item {ar} : Arabic Many forms are mutually un-intelligible in spoken media. Notable forms: {ar-ae} UAE Arabic; {ar-bh} Bahrain Arabic; {ar-dz} Algerian Arabic; {ar-eg} Egyptian Arabic; {ar-iq} Iraqi Arabic; {ar-jo} Jordanian Arabic; {ar-kw} Kuwait Arabic; {ar-lb} Lebanese Arabic; {ar-ly} Libyan Arabic; {ar-ma} Moroccan Arabic; {ar-om} Omani Arabic; {ar-qa} Qatari Arabic; {ar-sa} Sauda Arabic; {ar-sy} Syrian Arabic; {ar-tn} Tunisian Arabic; {ar-ye} Yemen Arabic. =item {arc} : Aramaic NOT Amharic! NOT Samaritan Aramaic! =item {arp} : Arapaho =item {arn} : Araucanian =item {arw} : Arawak =item {hy} : Armenian =item {an} : Aragonese =item [{art} : Artificial (Other)] =item {ast} : Asturian eq Bable. =item {as} : Assamese =item [{ath} : Athapascan languages] eq Athabaskan. eq Athapaskan. eq Athabascan. =item [{aus} : Australian languages] =item [{map} : Austronesian (Other)] =item {av} : Avaric (Formerly "ava".) =item {ae} : Avestan eq Zend =item {awa} : Awadhi =item {ay} : Aymara =item {az} : Azerbaijani eq Azeri Notable forms: {az-Arab} Azerbaijani in Arabic script; {az-Cyrl} Azerbaijani in Cyrillic script; {az-Latn} Azerbaijani in Latin script. =item {ban} : Balinese =item [{bat} : Baltic (Other)] =item {bal} : Baluchi =item {bm} : Bambara (Formerly "bam".) =item [{bai} : Bamileke languages] =item {bad} : Banda =item [{bnt} : Bantu (Other)] =item {bas} : Basa =item {ba} : Bashkir =item {eu} : Basque =item {btk} : Batak (Indonesia) =item {bej} : Beja =item {be} : Belarusian eq Belarussian. eq Byelarussian. eq Belorussian. eq Byelorussian. eq White Russian. eq White Ruthenian. NOT Ruthenian! =item {bem} : Bemba =item {bn} : Bengali eq Bangla. =item [{ber} : Berber (Other)] =item {bho} : Bhojpuri =item {bh} : Bihari =item {bik} : Bikol =item {bin} : Bini =item {bi} : Bislama eq Bichelamar. =item {bs} : Bosnian =item {bra} : Braj =item {br} : Breton =item {bug} : Buginese =item {bg} : Bulgarian =item {i-bnn} : Bunun =item {bua} : Buriat =item {my} : Burmese =item {cad} : Caddo =item {car} : Carib =item {ca} : Catalan eq CatalEn. eq Catalonian. =item [{cau} : Caucasian (Other)] =item {ceb} : Cebuano =item [{cel} : Celtic (Other)] Notable forms: {cel-gaulish} Gaulish (Historical) =item [{cai} : Central American Indian (Other)] =item {chg} : Chagatai (Historical?) =item [{cmc} : Chamic languages] =item {ch} : Chamorro =item {ce} : Chechen =item {chr} : Cherokee eq Tsalagi =item {chy} : Cheyenne =item {chb} : Chibcha (Historical) NOT Chibchan (which is a language family). =item {ny} : Chichewa eq Nyanja. eq Chinyanja. =item {zh} : Chinese Many forms are mutually un-intelligible in spoken media. Notable forms: {zh-Hans} Chinese, in simplified script; {zh-Hant} Chinese, in traditional script; {zh-tw} Taiwan Chinese; {zh-cn} PRC Chinese; {zh-sg} Singapore Chinese; {zh-mo} Macau Chinese; {zh-hk} Hong Kong Chinese; {zh-guoyu} Mandarin [Putonghua/Guoyu]; {zh-hakka} Hakka [formerly "i-hakka"]; {zh-min} Hokkien; {zh-min-nan} Southern Hokkien; {zh-wuu} Shanghaiese; {zh-xiang} Hunanese; {zh-gan} Gan; {zh-yue} Cantonese. =for etc {i-hakka} Hakka (old tag) =item {chn} : Chinook Jargon eq Chinook Wawa. =item {chp} : Chipewyan =item {cho} : Choctaw =item {cu} : Church Slavic eq Old Church Slavonic. =item {chk} : Chuukese eq Trukese. eq Chuuk. eq Truk. eq Ruk. =item {cv} : Chuvash =item {cop} : Coptic =item {kw} : Cornish =item {co} : Corsican eq Corse. =item {cr} : Cree NOT Creek! (Formerly "cre".) =item {mus} : Creek NOT Cree! =item [{cpe} : English-based Creoles and pidgins (Other)] =item [{cpf} : French-based Creoles and pidgins (Other)] =item [{cpp} : Portuguese-based Creoles and pidgins (Other)] =item [{crp} : Creoles and pidgins (Other)] =item {hr} : Croatian eq Croat. =item [{cus} : Cushitic (Other)] =item {cs} : Czech =item {dak} : Dakota eq Nakota. eq Latoka. =item {da} : Danish =item {dar} : Dargwa =item {day} : Dayak =item {i-default} : Default (Fallthru) Language Defined in RFC 2277, this is for tagging text (which must include English text, and might/should include text in other appropriate languages) that is emitted in a context where language-negotiation wasn't possible -- in SMTP mail failure messages, for example. =item {del} : Delaware =item {din} : Dinka =item {dv} : Divehi eq Maldivian. (Formerly "div".) =item {doi} : Dogri NOT Dogrib! =item {dgr} : Dogrib NOT Dogri! =item [{dra} : Dravidian (Other)] =item {dua} : Duala =item {nl} : Dutch eq Netherlander. Notable forms: {nl-nl} Netherlands Dutch; {nl-be} Belgian Dutch. =item {dum} : Middle Dutch (ca.1050-1350) (Historical) =item {dyu} : Dyula =item {dz} : Dzongkha =item {efi} : Efik =item {egy} : Ancient Egyptian (Historical) =item {eka} : Ekajuk =item {elx} : Elamite (Historical) =item {en} : English Notable forms: {en-au} Australian English; {en-bz} Belize English; {en-ca} Canadian English; {en-gb} UK English; {en-ie} Irish English; {en-jm} Jamaican English; {en-nz} New Zealand English; {en-ph} Philippine English; {en-tt} Trinidad English; {en-us} US English; {en-za} South African English; {en-zw} Zimbabwe English. =item {enm} : Old English (1100-1500) (Historical) =item {ang} : Old English (ca.450-1100) eq Anglo-Saxon. (Historical) =item {i-enochian} : Enochian (Artificial) =item {myv} : Erzya =item {eo} : Esperanto (Artificial) =item {et} : Estonian =item {ee} : Ewe (Formerly "ewe".) =item {ewo} : Ewondo =item {fan} : Fang =item {fat} : Fanti =item {fo} : Faroese =item {fj} : Fijian =item {fi} : Finnish =item [{fiu} : Finno-Ugrian (Other)] eq Finno-Ugric. NOT Ugaritic! =item {fon} : Fon =item {fr} : French Notable forms: {fr-fr} France French; {fr-be} Belgian French; {fr-ca} Canadian French; {fr-ch} Swiss French; {fr-lu} Luxembourg French; {fr-mc} Monaco French. =item {frm} : Middle French (ca.1400-1600) (Historical) =item {fro} : Old French (842-ca.1400) (Historical) =item {fy} : Frisian =item {fur} : Friulian =item {ff} : Fulah (Formerly "ful".) =item {gaa} : Ga =item {gd} : Scots Gaelic NOT Scots! =item {gl} : Gallegan eq Galician =item {lg} : Ganda (Formerly "lug".) =item {gay} : Gayo =item {gba} : Gbaya =item {gez} : Geez eq Ge'ez =item {ka} : Georgian =item {de} : German Notable forms: {de-at} Austrian German; {de-be} Belgian German; {de-ch} Swiss German; {de-de} Germany German; {de-li} Liechtenstein German; {de-lu} Luxembourg German. =item {gmh} : Middle High German (ca.1050-1500) (Historical) =item {goh} : Old High German (ca.750-1050) (Historical) =item [{gem} : Germanic (Other)] =item {gil} : Gilbertese =item {gon} : Gondi =item {gor} : Gorontalo =item {got} : Gothic (Historical) =item {grb} : Grebo =item {grc} : Ancient Greek (Historical) (Until 15th century or so.) =item {el} : Modern Greek (Since 15th century or so.) =item {gn} : Guarani GuaranE =item {gu} : Gujarati =item {gwi} : Gwich'in eq Gwichin =item {hai} : Haida =item {ht} : Haitian eq Haitian Creole =item {ha} : Hausa =item {haw} : Hawaiian Hawai'ian =item {he} : Hebrew (Formerly "iw".) =for etc {iw} Hebrew (old tag) =item {hz} : Herero =item {hil} : Hiligaynon =item {him} : Himachali =item {hi} : Hindi =item {ho} : Hiri Motu =item {hit} : Hittite (Historical) =item {hmn} : Hmong =item {hu} : Hungarian =item {hup} : Hupa =item {iba} : Iban =item {is} : Icelandic =item {io} : Ido (Artificial) =item {ig} : Igbo (Formerly "ibo".) =item {ijo} : Ijo =item {ilo} : Iloko =item [{inc} : Indic (Other)] =item [{ine} : Indo-European (Other)] =item {id} : Indonesian (Formerly "in".) =for etc {in} Indonesian (old tag) =item {inh} : Ingush =item {ia} : Interlingua (International Auxiliary Language Association) (Artificial) NOT Interlingue! =item {ie} : Interlingue (Artificial) NOT Interlingua! =item {iu} : Inuktitut A subform of "Eskimo". =item {ik} : Inupiaq A subform of "Eskimo". =item [{ira} : Iranian (Other)] =item {ga} : Irish =item {mga} : Middle Irish (900-1200) (Historical) =item {sga} : Old Irish (to 900) (Historical) =item [{iro} : Iroquoian languages] =item {it} : Italian Notable forms: {it-it} Italy Italian; {it-ch} Swiss Italian. =item {ja} : Japanese (NOT "jp"!) =item {jv} : Javanese (Formerly "jw" because of a typo.) =item {jrb} : Judeo-Arabic =item {jpr} : Judeo-Persian =item {kbd} : Kabardian =item {kab} : Kabyle =item {kac} : Kachin =item {kl} : Kalaallisut eq Greenlandic "Eskimo" =item {xal} : Kalmyk =item {kam} : Kamba =item {kn} : Kannada eq Kanarese. NOT Canadian! =item {kr} : Kanuri (Formerly "kau".) =item {krc} : Karachay-Balkar =item {kaa} : Kara-Kalpak =item {kar} : Karen =item {ks} : Kashmiri =item {csb} : Kashubian eq Kashub =item {kaw} : Kawi =item {kk} : Kazakh =item {kha} : Khasi =item {km} : Khmer eq Cambodian. eq Kampuchean. =item [{khi} : Khoisan (Other)] =item {kho} : Khotanese =item {ki} : Kikuyu eq Gikuyu. =item {kmb} : Kimbundu =item {rw} : Kinyarwanda =item {ky} : Kirghiz =item {i-klingon} : Klingon =item {kv} : Komi =item {kg} : Kongo (Formerly "kon".) =item {kok} : Konkani =item {ko} : Korean =item {kos} : Kosraean =item {kpe} : Kpelle =item {kro} : Kru =item {kj} : Kuanyama =item {kum} : Kumyk =item {ku} : Kurdish =item {kru} : Kurukh =item {kut} : Kutenai =item {lad} : Ladino eq Judeo-Spanish. NOT Ladin (a minority language in Italy). =item {lah} : Lahnda NOT Lamba! =item {lam} : Lamba NOT Lahnda! =item {lo} : Lao eq Laotian. =item {la} : Latin (Historical) NOT Ladin! NOT Ladino! =item {lv} : Latvian eq Lettish. =item {lb} : Letzeburgesch eq Luxemburgian, eq Luxemburger. (Formerly "i-lux".) =for etc {i-lux} Letzeburgesch (old tag) =item {lez} : Lezghian =item {li} : Limburgish eq Limburger, eq Limburgan. NOT Letzeburgesch! =item {ln} : Lingala =item {lt} : Lithuanian =item {nds} : Low German eq Low Saxon. eq Low German. eq Low Saxon. =item {art-lojban} : Lojban (Artificial) =item {loz} : Lozi =item {lu} : Luba-Katanga (Formerly "lub".) =item {lua} : Luba-Lulua =item {lui} : Luiseno eq LuiseEo. =item {lun} : Lunda =item {luo} : Luo (Kenya and Tanzania) =item {lus} : Lushai =item {mk} : Macedonian eq the modern Slavic language spoken in what was Yugoslavia. NOT the form of Greek spoken in Greek Macedonia! =item {mad} : Madurese =item {mag} : Magahi =item {mai} : Maithili =item {mak} : Makasar =item {mg} : Malagasy =item {ms} : Malay NOT Malayalam! =item {ml} : Malayalam NOT Malay! =item {mt} : Maltese =item {mnc} : Manchu =item {mdr} : Mandar NOT Mandarin! =item {man} : Mandingo =item {mni} : Manipuri eq Meithei. =item [{mno} : Manobo languages] =item {gv} : Manx =item {mi} : Maori NOT Mari! =item {mr} : Marathi =item {chm} : Mari NOT Maori! =item {mh} : Marshall eq Marshallese. =item {mwr} : Marwari =item {mas} : Masai =item [{myn} : Mayan languages] =item {men} : Mende =item {mic} : Micmac =item {min} : Minangkabau =item {i-mingo} : Mingo eq the Irquoian language West Virginia Seneca. NOT New York Seneca! =item [{mis} : Miscellaneous languages] Don't use this. =item {moh} : Mohawk =item {mdf} : Moksha =item {mo} : Moldavian eq Moldovan. =item [{mkh} : Mon-Khmer (Other)] =item {lol} : Mongo =item {mn} : Mongolian eq Mongol. =item {mos} : Mossi =item [{mul} : Multiple languages] Not for normal use. =item [{mun} : Munda languages] =item {nah} : Nahuatl =item {nap} : Neapolitan =item {na} : Nauru =item {nv} : Navajo eq Navaho. (Formerly "i-navajo".) =for etc {i-navajo} Navajo (old tag) =item {nd} : North Ndebele =item {nr} : South Ndebele =item {ng} : Ndonga =item {ne} : Nepali eq Nepalese. Notable forms: {ne-np} Nepal Nepali; {ne-in} India Nepali. =item {new} : Newari =item {nia} : Nias =item [{nic} : Niger-Kordofanian (Other)] =item [{ssa} : Nilo-Saharan (Other)] =item {niu} : Niuean =item {nog} : Nogai =item {non} : Old Norse (Historical) =item [{nai} : North American Indian] Do not use this. =item {no} : Norwegian Note the two following forms: =item {nb} : Norwegian Bokmal eq BokmEl, (A form of Norwegian.) (Formerly "no-bok".) =for etc {no-bok} Norwegian Bokmal (old tag) =item {nn} : Norwegian Nynorsk (A form of Norwegian.) (Formerly "no-nyn".) =for etc {no-nyn} Norwegian Nynorsk (old tag) =item [{nub} : Nubian languages] =item {nym} : Nyamwezi =item {nyn} : Nyankole =item {nyo} : Nyoro =item {nzi} : Nzima =item {oc} : Occitan (post 1500) eq ProvenEal, eq Provencal =item {oj} : Ojibwa eq Ojibwe. (Formerly "oji".) =item {or} : Oriya =item {om} : Oromo =item {osa} : Osage =item {os} : Ossetian; Ossetic =item [{oto} : Otomian languages] Group of languages collectively called "OtomE". =item {pal} : Pahlavi eq Pahlevi =item {i-pwn} : Paiwan eq Pariwan =item {pau} : Palauan =item {pi} : Pali (Historical?) =item {pam} : Pampanga =item {pag} : Pangasinan =item {pa} : Panjabi eq Punjabi =item {pap} : Papiamento eq Papiamentu. =item [{paa} : Papuan (Other)] =item {fa} : Persian eq Farsi. eq Iranian. =item {peo} : Old Persian (ca.600-400 B.C.) =item [{phi} : Philippine (Other)] =item {phn} : Phoenician (Historical) =item {pon} : Pohnpeian NOT Pompeiian! =item {pl} : Polish =item {pt} : Portuguese eq Portugese. Notable forms: {pt-pt} Portugal Portuguese; {pt-br} Brazilian Portuguese. =item [{pra} : Prakrit languages] =item {pro} : Old Provencal (to 1500) eq Old ProvenEal. (Historical.) =item {ps} : Pushto eq Pashto. eq Pushtu. =item {qu} : Quechua eq Quecha. =item {rm} : Raeto-Romance eq Romansh. =item {raj} : Rajasthani =item {rap} : Rapanui =item {rar} : Rarotongan =item [{qaa - qtz} : Reserved for local use.] =item [{roa} : Romance (Other)] NOT Romanian! NOT Romany! NOT Romansh! =item {ro} : Romanian eq Rumanian. NOT Romany! =item {rom} : Romany eq Rom. NOT Romanian! =item {rn} : Rundi =item {ru} : Russian NOT White Russian! NOT Rusyn! =item [{sal} : Salishan languages] Large language group. =item {sam} : Samaritan Aramaic NOT Aramaic! =item {se} : Northern Sami eq Lappish. eq Lapp. eq (Northern) Saami. =item {sma} : Southern Sami =item {smn} : Inari Sami =item {smj} : Lule Sami =item {sms} : Skolt Sami =item [{smi} : Sami languages (Other)] =item {sm} : Samoan =item {sad} : Sandawe =item {sg} : Sango =item {sa} : Sanskrit (Historical) =item {sat} : Santali =item {sc} : Sardinian eq Sard. =item {sas} : Sasak =item {sco} : Scots NOT Scots Gaelic! =item {sel} : Selkup =item [{sem} : Semitic (Other)] =item {sr} : Serbian eq Serb. NOT Sorbian. Notable forms: {sr-Cyrl} : Serbian in Cyrillic script; {sr-Latn} : Serbian in Latin script. =item {srr} : Serer =item {shn} : Shan =item {sn} : Shona =item {sid} : Sidamo =item {sgn-...} : Sign Languages Always use with a subtag. Notable forms: {sgn-gb} British Sign Language (BSL); {sgn-ie} Irish Sign Language (ESL); {sgn-ni} Nicaraguan Sign Language (ISN); {sgn-us} American Sign Language (ASL). (And so on with other country codes as the subtag.) =item {bla} : Siksika eq Blackfoot. eq Pikanii. =item {sd} : Sindhi =item {si} : Sinhalese eq Sinhala. =item [{sit} : Sino-Tibetan (Other)] =item [{sio} : Siouan languages] =item {den} : Slave (Athapascan) ("Slavey" is a subform.) =item [{sla} : Slavic (Other)] =item {sk} : Slovak eq Slovakian. =item {sl} : Slovenian eq Slovene. =item {sog} : Sogdian =item {so} : Somali =item {son} : Songhai =item {snk} : Soninke =item {wen} : Sorbian languages eq Wendish. eq Sorb. eq Lusatian. eq Wend. NOT Venda! NOT Serbian! =item {nso} : Northern Sotho =item {st} : Southern Sotho eq Sutu. eq Sesotho. =item [{sai} : South American Indian (Other)] =item {es} : Spanish Notable forms: {es-ar} Argentine Spanish; {es-bo} Bolivian Spanish; {es-cl} Chilean Spanish; {es-co} Colombian Spanish; {es-do} Dominican Spanish; {es-ec} Ecuadorian Spanish; {es-es} Spain Spanish; {es-gt} Guatemalan Spanish; {es-hn} Honduran Spanish; {es-mx} Mexican Spanish; {es-pa} Panamanian Spanish; {es-pe} Peruvian Spanish; {es-pr} Puerto Rican Spanish; {es-py} Paraguay Spanish; {es-sv} Salvadoran Spanish; {es-us} US Spanish; {es-uy} Uruguayan Spanish; {es-ve} Venezuelan Spanish. =item {suk} : Sukuma =item {sux} : Sumerian (Historical) =item {su} : Sundanese =item {sus} : Susu =item {sw} : Swahili eq Kiswahili =item {ss} : Swati =item {sv} : Swedish Notable forms: {sv-se} Sweden Swedish; {sv-fi} Finland Swedish. =item {syr} : Syriac =item {tl} : Tagalog =item {ty} : Tahitian =item [{tai} : Tai (Other)] NOT Thai! =item {tg} : Tajik =item {tmh} : Tamashek =item {ta} : Tamil =item {i-tao} : Tao eq Yami. =item {tt} : Tatar =item {i-tay} : Tayal eq Atayal. eq Atayan. =item {te} : Telugu =item {ter} : Tereno =item {tet} : Tetum =item {th} : Thai NOT Tai! =item {bo} : Tibetan =item {tig} : Tigre =item {ti} : Tigrinya =item {tem} : Timne eq Themne. eq Timene. =item {tiv} : Tiv =item {tli} : Tlingit =item {tpi} : Tok Pisin =item {tkl} : Tokelau =item {tog} : Tonga (Nyasa) NOT Tsonga! =item {to} : Tonga (Tonga Islands) (Pronounced "Tong-a", not "Tong-ga") NOT Tsonga! =item {tsi} : Tsimshian eq Sm'algyax =item {ts} : Tsonga NOT Tonga! =item {i-tsu} : Tsou =item {tn} : Tswana Same as Setswana. =item {tum} : Tumbuka =item [{tup} : Tupi languages] =item {tr} : Turkish (Typically in Roman script) =item {ota} : Ottoman Turkish (1500-1928) (Typically in Arabic script) (Historical) =item {crh} : Crimean Turkish eq Crimean Tatar =item {tk} : Turkmen eq Turkmeni. =item {tvl} : Tuvalu =item {tyv} : Tuvinian eq Tuvan. eq Tuvin. =item {tw} : Twi =item {udm} : Udmurt =item {uga} : Ugaritic NOT Ugric! =item {ug} : Uighur =item {uk} : Ukrainian =item {umb} : Umbundu =item {und} : Undetermined Not a tag for normal use. =item {ur} : Urdu =item {uz} : Uzbek eq Ezbek Notable forms: {uz-Cyrl} Uzbek in Cyrillic script; {uz-Latn} Uzbek in Latin script. =item {vai} : Vai =item {ve} : Venda NOT Wendish! NOT Wend! NOT Avestan! (Formerly "ven".) =item {vi} : Vietnamese eq Viet. =item {vo} : Volapuk eq VolapEk. (Artificial) =item {vot} : Votic eq Votian. eq Vod. =item [{wak} : Wakashan languages] =item {wa} : Walloon =item {wal} : Walamo eq Wolaytta. =item {war} : Waray Presumably the Philippine language Waray-Waray (SamareEo), not the smaller Philippine language Waray Sorsogon, nor the extinct Australian language Waray. =item {was} : Washo eq Washoe =item {cy} : Welsh =item {wo} : Wolof =item {x-...} : Unregistered (Semi-Private Use) "x-" is a prefix for language tags that are not registered with ISO or IANA. Example, x-double-dutch =item {xh} : Xhosa =item {sah} : Yakut =item {yao} : Yao (The Yao in Malawi?) =item {yap} : Yapese eq Yap =item {ii} : Sichuan Yi =item {yi} : Yiddish Formerly "ji". Usually in Hebrew script. Notable forms: {yi-latn} Yiddish in Latin script =item {yo} : Yoruba =item [{ypk} : Yupik languages] Several "Eskimo" languages. =item {znd} : Zande =item [{zap} : Zapotec] (A group of languages.) =item {zen} : Zenaga NOT Zend. =item {za} : Zhuang =item {zu} : Zulu =item {zun} : Zuni eq ZuEi =back =for woohah END =head1 SEE ALSO L and its "See Also" section. =head1 COPYRIGHT AND DISCLAIMER Copyright (c) 2001+ Sean M. Burke. All rights reserved. You can redistribute and/or modify this document under the same terms as Perl itself. This document is provided in the hope that it will be useful, but without any warranty; without even the implied warranty of accuracy, authoritativeness, completeness, merchantability, or fitness for a particular purpose. Email any corrections or questions to me. =head1 AUTHOR Sean M. Burke, sburkeE<64>cpan.org =cut # To generate a list of just the two and three-letter codes: #!/usr/local/bin/perl -w require 5; # Time-stamp: "2001-03-13 21:53:39 MST" # Sean M. Burke, sburke@cpan.org # This program is for generating the language_codes.txt file use strict; use LWP::Simple; use HTML::TreeBuilder 3.10; my $root = HTML::TreeBuilder->new(); my $url = 'http://lcweb.loc.gov/standards/iso639-2/bibcodes.html'; $root->parse(get($url) || die "Can't get $url"); $root->eof(); my @codes; foreach my $tr ($root->find_by_tag_name('tr')) { my @f = map $_->as_text(), $tr->content_list(); #print map("<$_> ", @f), "\n"; next unless @f == 5; pop @f; # nix the French name next if $f[-1] eq 'Language Name (English)'; # it's a header line my $xx = splice(@f, 2,1); # pull out the two-letter code $f[-1] =~ s/^\s+//; $f[-1] =~ s/\s+$//; if($xx =~ m/[a-zA-Z]/) { # there's a two-letter code for it push @codes, [ lc($f[-1]), "$xx\t$f[-1]\n" ]; } else { # print the three-letter codes. if($f[0] eq $f[1]) { push @codes, [ lc($f[-1]), "$f[1]\t$f[2]\n" ]; } else { # shouldn't happen push @codes, [ lc($f[-1]), "@f !!!!!!!!!!\n" ]; } } } print map $_->[1], sort {;% .$ ..&Bzip2.pm' Deflate.pm( Identity.pmpackage IO::Compress::Adapter::Bzip2 ; use strict; use warnings; use bytes; use IO::Compress::Base::Common 2.033 qw(:Status); #use Compress::Bzip2 ; use Compress::Raw::Bzip2 2.033 ; our ($VERSION); $VERSION = '2.033'; sub mkCompObject { my $BlockSize100K = shift ; my $WorkFactor = shift ; my $Verbosity = shift ; my ($def, $status) = new Compress::Raw::Bzip2(1, $BlockSize100K, $WorkFactor, $Verbosity); #my ($def, $status) = bzdeflateInit(); #-BlockSize100K => $params->value('BlockSize100K'), #-WorkFactor => $params->value('WorkFactor'); return (undef, "Could not create Deflate object: $status", $status) if $status != BZ_OK ; return bless {'Def' => $def, 'Error' => '', 'ErrorNo' => 0, } ; } sub compr { my $self = shift ; my $def = $self->{Def}; #my ($out, $status) = $def->bzdeflate(defined ${$_[0]} ? ${$_[0]} : "") ; my $status = $def->bzdeflate($_[0], $_[1]) ; $self->{ErrorNo} = $status; if ($status != BZ_RUN_OK) { $self->{Error} = "Deflate Error: $status"; return STATUS_ERROR; } #${ $_[1] } .= $out if defined $out; return STATUS_OK; } sub flush { my $self = shift ; my $def = $self->{Def}; #my ($out, $status) = $def->bzflush($opt); #my $status = $def->bzflush($_[0], $opt); my $status = $def->bzflush($_[0]); $self->{ErrorNo} = $status; if ($status != BZ_RUN_OK) { $self->{Error} = "Deflate Error: $status"; return STATUS_ERROR; } #${ $_[0] } .= $out if defined $out ; return STATUS_OK; } sub close { my $self = shift ; my $def = $self->{Def}; #my ($out, $status) = $def->bzclose(); my $status = $def->bzclose($_[0]); $self->{ErrorNo} = $status; if ($status != BZ_STREAM_END) { $self->{Error} = "Deflate Error: $status"; return STATUS_ERROR; } #${ $_[0] } .= $out if defined $out ; return STATUS_OK; } sub reset { my $self = shift ; my $outer = $self->{Outer}; my ($def, $status) = new Compress::Raw::Bzip2(); $self->{ErrorNo} = ($status == BZ_OK) ? 0 : $status ; if ($status != BZ_OK) { $self->{Error} = "Cannot create Deflate object: $status"; return STATUS_ERROR; } $self->{Def} = $def; return STATUS_OK; } sub compressedBytes { my $self = shift ; $self->{Def}->compressedBytes(); } sub uncompressedBytes { my $self = shift ; $self->{Def}->uncompressedBytes(); } #sub total_out #{ # my $self = shift ; # 0; #} # #sub total_in #{ # my $self = shift ; # $self->{Def}->total_in(); #} # #sub crc32 #{ # my $self = shift ; # $self->{Def}->crc32(); #} # #sub adler32 #{ # my $self = shift ; # $self->{Def}->adler32(); #} 1; __END__ * .$ ..+ Common.pmpackage IO::Compress::Adapter::Deflate ; use strict; use warnings; use bytes; use IO::Compress::Base::Common 2.033 qw(:Status); use Compress::Raw::Zlib 2.033 qw(Z_OK Z_FINISH MAX_WBITS) ; our ($VERSION); $VERSION = '2.033'; sub mkCompObject { my $crc32 = shift ; my $adler32 = shift ; my $level = shift ; my $strategy = shift ; my ($def, $status) = new Compress::Raw::Zlib::Deflate -AppendOutput => 1, -CRC32 => $crc32, -ADLER32 => $adler32, -Level => $level, -Strategy => $strategy, -WindowBits => - MAX_WBITS; return (undef, "Cannot create Deflate object: $status", $status) if $status != Z_OK; return bless {'Def' => $def, 'Error' => '', } ; } sub compr { my $self = shift ; my $def = $self->{Def}; my $status = $def->deflate($_[0], $_[1]) ; $self->{ErrorNo} = $status; if ($status != Z_OK) { $self->{Error} = "Deflate Error: $status"; return STATUS_ERROR; } return STATUS_OK; } sub flush { my $self = shift ; my $def = $self->{Def}; my $opt = $_[1] || Z_FINISH; my $status = $def->flush($_[0], $opt); $self->{ErrorNo} = $status; if ($status != Z_OK) { $self->{Error} = "Deflate Error: $status"; return STATUS_ERROR; } return STATUS_OK; } sub close { my $self = shift ; my $def = $self->{Def}; $def->flush($_[0], Z_FINISH) if defined $def ; } sub reset { my $self = shift ; my $def = $self->{Def}; my $status = $def->deflateReset() ; $self->{ErrorNo} = $status; if ($status != Z_OK) { $self->{Error} = "Deflate Error: $status"; return STATUS_ERROR; } return STATUS_OK; } sub deflateParams { my $self = shift ; my $def = $self->{Def}; my $status = $def->deflateParams(@_); $self->{ErrorNo} = $status; if ($status != Z_OK) { $self->{Error} = "deflateParams Error: $status"; return STATUS_ERROR; } return STATUS_OK; } #sub total_out #{ # my $self = shift ; # $self->{Def}->total_out(); #} # #sub total_in #{ # my $self = shift ; # $self->{Def}->total_in(); #} sub compressedBytes { my $self = shift ; $self->{Def}->compressedBytes(); } sub uncompressedBytes { my $self = shift ; $self->{Def}->uncompressedBytes(); } sub crc32 { my $self = shift ; $self->{Def}->crc32(); } sub adler32 { my $self = shift ; $self->{Def}->adler32(); } 1; __END__ 3 .$ ..4 Constants.pm5 .$ ..6 Constants.pm7Extra.pmpackage IO::Compress::Adapter::Identity ; use strict; use warnings; use bytes; use IO::Compress::Base::Common 2.033 qw(:Status); our ($VERSION); $VERSION = '2.033'; sub mkCompObject { my $level = shift ; my $strategy = shift ; return bless { 'CompSize' => 0, 'UnCompSize' => 0, 'Error' => '', 'ErrorNo' => 0, } ; } sub compr { my $self = shift ; if (defined ${ $_[0] } && length ${ $_[0] }) { $self->{CompSize} += length ${ $_[0] } ; $self->{UnCompSize} = $self->{CompSize} ; if ( ref $_[1] ) { ${ $_[1] } .= ${ $_[0] } } else { $_[1] .= ${ $_[0] } } } return STATUS_OK ; } sub flush { my $self = shift ; return STATUS_OK; } sub close { my $self = shift ; return STATUS_OK; } sub reset { my $self = shift ; $self->{CompSize} = 0; $self->{UnCompSize} = 0; return STATUS_OK; } sub deflateParams { my $self = shift ; return STATUS_OK; } #sub total_out #{ # my $self = shift ; # return $self->{UnCompSize} ; #} # #sub total_in #{ # my $self = shift ; # return $self->{UnCompSize} ; #} sub compressedBytes { my $self = shift ; return $self->{UnCompSize} ; } sub uncompressedBytes { my $self = shift ; return $self->{UnCompSize} ; } 1; __END__ RINT = \&print; *PRINTF = \&printf; *WRITE = \&syswrite; *write = \&syswrite; *SEEK = \&seek; *TELL = \&tell; *EOF = \&eof; *CLOSE = \&close; *BINMODE = \&binmode; #*sysread = \&_notAvailable; #*syswrite = \&_write; 1; __END__ =head1 NAME IO::Compress::Base - Base Class for IO::Compress modules =head1 SYNOPSIS use IO::Compress::Base ; =head1 DESCRIPTION This module is not intended for direct use in application code. Its sole purpose if to to be sub-classed by IO::Compress modules. =head1 SEE ALSO L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L L L, L, L, L =head1 AUTHOR This module was written by Paul Marquess, F. =head1 MODIFICATION HISTORY See the Changes file. =head1 COPYRIGHT AND LICENSE Copyright (c) 2005-2011 Paul Marquess. All rights reserved. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. by Paul Marquess, F. =head1 MODIFICATION HISTORY See the Changes file. =head1 COPYRIGHT AND LICENSE Copyright (c) 2005-2011 Paul Marquess. All rights reserved. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. package IO::Compress::Base ; require 5.004 ; use strict ; use warnings; use IO::Compress::Base::Common 2.033 ; use IO::File ; use Scalar::Util qw(blessed readonly); #use File::Glob; #require Exporter ; use Carp ; use Symbol; use bytes; our (@ISA, $VERSION); @ISA = qw(Exporter IO::File); $VERSION = '2.033'; #Can't locate object method "SWASHNEW" via package "utf8" (perhaps you forgot to load "utf8"?) at .../ext/Compress-Zlib/Gzip/blib/lib/Compress/Zlib/Common.pm line 16. sub saveStatus { my $self = shift ; ${ *$self->{ErrorNo} } = shift() + 0 ; ${ *$self->{Error} } = '' ; return ${ *$self->{ErrorNo} } ; } sub saveErrorString { my $self = shift ; my $retval = shift ; ${ *$self->{Error} } = shift ; ${ *$self->{ErrorNo} } = shift() + 0 if @_ ; return $retval; } sub croakError { my $self = shift ; $self->saveErrorString(0, $_[0]); croak $_[0]; } sub closeError { my $self = shift ; my $retval = shift ; my $errno = *$self->{ErrorNo}; my $error = ${ *$self->{Error} }; $self->close(); *$self->{ErrorNo} = $errno ; ${ *$self->{Error} } = $error ; return $retval; } sub error { my $self = shift ; return ${ *$self->{Error} } ; } sub errorNo { my $self = shift ; return ${ *$self->{ErrorNo} } ; } sub writeAt { my $self = shift ; my $offset = shift; my $data = shift; if (defined *$self->{FH}) { my $here = tell(*$self->{FH}); return $self->saveErrorString(undef, "Cannot seek to end of output filehandle: $!", $!) if $here < 0 ; seek(*$self->{FH}, $offset, SEEK_SET) or return $self->saveErrorString(undef, "Cannot seek to end of output filehandle: $!", $!) ; defined *$self->{FH}->write($data, length $data) or return $self->saveErrorString(undef, $!, $!) ; seek(*$self->{FH}, $here, SEEK_SET) or return $self->saveErrorString(undef, "Cannot seek to end of output filehandle: $!", $!) ; } else { substr(${ *$self->{Buffer} }, $offset, length($data)) = $data ; } return 1; } sub output { my $self = shift ; my $data = shift ; my $last = shift ; return 1 if length $data == 0 && ! $last ; if ( *$self->{FilterEnvelope} ) { *_ = \$data; &{ *$self->{FilterEnvelope} }(); } if (length $data) { if ( defined *$self->{FH} ) { defined *$self->{FH}->write( $data, length $data ) or return $self->saveErrorString(0, $!, $!); } else { ${ *$self->{Buffer} } .= $data ; } } return 1; } sub getOneShotParams { return ( 'MultiStream' => [1, 1, Parse_boolean, 1], ); } sub checkParams { my $self = shift ; my $class = shift ; my $got = shift || IO::Compress::Base::Parameters::new(); $got->parse( { # Generic Parameters 'AutoClose' => [1, 1, Parse_boolean, 0], #'Encode' => [1, 1, Parse_any, undef], 'Strict' => [0, 1, Parse_boolean, 1], 'Append' => [1, 1, Parse_boolean, 0], 'BinModeIn' => [1, 1, Parse_boolean, 0], 'FilterEnvelope' => [1, 1, Parse_any, undef], $self->getExtraParams(), *$self->{OneShot} ? $self->getOneShotParams() : (), }, @_) or $self->croakError("${class}: $got->{Error}") ; return $got ; } sub _create { my $obj = shift; my $got = shift; *$obj->{Closed} = 1 ; my $class = ref $obj; $obj->croakError("$class: Missing Output parameter") if ! @_ && ! $got ; my $outValue = shift ; my $oneShot = 1 ; if (! $got) { $oneShot = 0 ; $got = $obj->checkParams($class, undef, @_) or return undef ; } my $lax = ! $got->value('Strict') ; my $outType = whatIsOutput($outValue); $obj->ckOutputParam($class, $outValue) or return undef ; if ($outType eq 'buffer') { *$obj->{Buffer} = $outValue; } else { my $buff = "" ; *$obj->{Buffer} = \$buff ; } # Merge implies Append my $merge = $got->value('Merge') ; my $appendOutput = $got->value('Append') || $merge ; *$obj->{Append} = $appendOutput; *$obj->{FilterEnvelope} = $got->value('FilterEnvelope') ; if ($merge) { # Switch off Merge mode if output file/buffer is empty/doesn't exist if (($outType eq 'buffer' && length $$outValue == 0 ) || ($outType ne 'buffer' && (! -e $outValue || (-w _ && -z _))) ) { $merge = 0 } } # If output is a file, check that it is writable #no warnings; #if ($outType eq 'filename' && -e $outValue && ! -w _) # { return $obj->saveErrorString(undef, "Output file '$outValue' is not writable" ) } if ($got->parsed('Encode')) { my $want_encoding = $got->value('Encode'); *$obj->{Encoding} = getEncoding($obj, $class, $want_encoding); } $obj->ckParams($got) or $obj->croakError("${class}: " . $obj->error()); $obj->saveStatus(STATUS_OK) ; my $status ; if (! $merge) { *$obj->{Compress} = $obj->mkComp($got) or return undef; *$obj->{UnCompSize} = new U64 ; *$obj->{CompSize} = new U64 ; if ( $outType eq 'buffer') { ${ *$obj->{Buffer} } = '' unless $appendOutput ; } else { if ($outType eq 'handle') { *$obj->{FH} = $outValue ; setBinModeOutput(*$obj->{FH}) ; $outValue->flush() ; *$obj->{Handle} = 1 ; if ($appendOutput) { seek(*$obj->{FH}, 0, SEEK_END) or return $obj->saveErrorString(undef, "Cannot seek to end of output filehandle: $!", $!) ; } } elsif ($outType eq 'filename') { no warnings; my $mode = '>' ; $mode = '>>' if $appendOutput; *$obj->{FH} = new IO::File "$mode $outValue" or return $obj->saveErrorString(undef, "cannot open file '$outValue': $!", $!) ; *$obj->{StdIO} = ($outValue eq '-'); setBinModeOutput(*$obj->{FH}) ; } } *$obj->{Header} = $obj->mkHeader($got) ; $obj->output( *$obj->{Header} ) or return undef; } else { *$obj->{Compress} = $obj->createMerge($outValue, $outType) or return undef; } *$obj->{Closed} = 0 ; *$obj->{AutoClose} = $got->value('AutoClose') ; *$obj->{Output} = $outValue; *$obj->{ClassName} = $class; *$obj->{Got} = $got; *$obj->{OneShot} = 0 ; return $obj ; } sub ckOutputParam { my $self = shift ; my $from = shift ; my $outType = whatIsOutput($_[0]); $self->croakError("$from: output parameter not a filename, filehandle or scalar ref") if ! $outType ; #$self->croakError("$from: output filename is undef or null string") #if $outType eq 'filename' && (! defined $_[0] || $_[0] eq '') ; $self->croakError("$from: output buffer is read-only") if $outType eq 'buffer' && readonly(${ $_[0] }); return 1; } sub _def { my $obj = shift ; my $class= (caller)[0] ; my $name = (caller(1))[3] ; $obj->croakError("$name: expected at least 1 parameters\n") unless @_ >= 1 ; my $input = shift ; my $haveOut = @_ ; my $output = shift ; my $x = new IO::Compress::Base::Validator($class, *$obj->{Error}, $name, $input, $output) or return undef ; push @_, $output if $haveOut && $x->{Hash}; *$obj->{OneShot} = 1 ; my $got = $obj->checkParams($name, undef, @_) or return undef ; $x->{Got} = $got ; # if ($x->{Hash}) # { # while (my($k, $v) = each %$input) # { # $v = \$input->{$k} # unless defined $v ; # # $obj->_singleTarget($x, 1, $k, $v, @_) # or return undef ; # } # # return keys %$input ; # } if ($x->{GlobMap}) { $x->{oneInput} = 1 ; foreach my $pair (@{ $x->{Pairs} }) { my ($from, $to) = @$pair ; $obj->_singleTarget($x, 1, $from, $to, @_) or return undef ; } return scalar @{ $x->{Pairs} } ; } if (! $x->{oneOutput} ) { my $inFile = ($x->{inType} eq 'filenames' || $x->{inType} eq 'filename'); $x->{inType} = $inFile ? 'filename' : 'buffer'; foreach my $in ($x->{oneInput} ? $input : @$input) { my $out ; $x->{oneInput} = 1 ; $obj->_singleTarget($x, $inFile, $in, \$out, @_) or return undef ; push @$output, \$out ; #if ($x->{outType} eq 'array') # { push @$output, \$out } #else # { $output->{$in} = \$out } } return 1 ; } # finally the 1 to 1 and n to 1 return $obj->_singleTarget($x, 1, $input, $output, @_); croak "should not be here" ; } sub _singleTarget { my $obj = shift ; my $x = shift ; my $inputIsFilename = shift; my $input = shift; if ($x->{oneInput}) { $obj->getFileInfo($x->{Got}, $input) if isaFilename($input) and $inputIsFilename ; my $z = $obj->_create($x->{Got}, @_) or return undef ; defined $z->_wr2($input, $inputIsFilename) or return $z->closeError(undef) ; return $z->close() ; } else { my $afterFirst = 0 ; my $inputIsFilename = ($x->{inType} ne 'array'); my $keep = $x->{Got}->clone(); #for my $element ( ($x->{inType} eq 'hash') ? keys %$input : @$input) for my $element ( @$input) { my $isFilename = isaFilename($element); if ( $afterFirst ++ ) { defined addInterStream($obj, $element, $isFilename) or return $obj->closeError(undef) ; } else { $obj->getFileInfo($x->{Got}, $element) if $isFilename; $obj->_create($x->{Got}, @_) or return undef ; } defined $obj->_wr2($element, $isFilename) or return $obj->closeError(undef) ; *$obj->{Got} = $keep->clone(); } return $obj->close() ; } } sub _wr2 { my $self = shift ; my $source = shift ; my $inputIsFilename = shift; my $input = $source ; if (! $inputIsFilename) { $input = \$source if ! ref $source; } if ( ref $input && ref $input eq 'SCALAR' ) { return $self->syswrite($input, @_) ; } if ( ! ref $input || isaFilehandle($input)) { my $isFilehandle = isaFilehandle($input) ; my $fh = $input ; if ( ! $isFilehandle ) { $fh = new IO::File "<$input" or return $self->saveErrorString(undef, "cannot open file '$input': $!", $!) ; } binmode $fh if *$self->{Got}->valueOrDefault('BinModeIn') ; my $status ; my $buff ; my $count = 0 ; while ($status = read($fh, $buff, 16 * 1024)) { $count += length $buff; defined $self->syswrite($buff, @_) or return undef ; } return $self->saveErrorString(undef, $!, $!) if ! defined $status ; if ( (!$isFilehandle || *$self->{AutoClose}) && $input ne '-') { $fh->close() or return undef ; } return $count ; } croak "Should not be here"; return undef; } sub addInterStream { my $self = shift ; my $input = shift ; my $inputIsFilename = shift ; if (*$self->{Got}->value('MultiStream')) { $self->getFileInfo(*$self->{Got}, $input) #if isaFilename($input) and $inputIsFilename ; if isaFilename($input) ; # TODO -- newStream needs to allow gzip/zip header to be modified return $self->newStream(); } elsif (*$self->{Got}->value('AutoFlush')) { #return $self->flush(Z_FULL_FLUSH); } return 1 ; } sub getFileInfo { } sub TIEHANDLE { return $_[0] if ref($_[0]); die "OOPS\n" ; } sub UNTIE { my $self = shift ; } sub DESTROY { my $self = shift ; local ($., $@, $!, $^E, $?); $self->close() ; # TODO - memory leak with 5.8.0 - this isn't called until # global destruction # %{ *$self } = () ; undef $self ; } sub filterUncompressed { } sub syswrite { my $self = shift ; my $buffer ; if (ref $_[0] ) { $self->croakError( *$self->{ClassName} . "::write: not a scalar reference" ) unless ref $_[0] eq 'SCALAR' ; $buffer = $_[0] ; } else { $buffer = \$_[0] ; } $] >= 5.008 and ( utf8::downgrade($$buffer, 1) or croak "Wide character in " . *$self->{ClassName} . "::write:"); if (@_ > 1) { my $slen = defined $$buffer ? length($$buffer) : 0; my $len = $slen; my $offset = 0; $len = $_[1] if $_[1] < $len; if (@_ > 2) { $offset = $_[2] || 0; $self->croakError(*$self->{ClassName} . "::write: offset outside string") if $offset > $slen; if ($offset < 0) { $offset += $slen; $self->croakError( *$self->{ClassName} . "::write: offset outside string") if $offset < 0; } my $rem = $slen - $offset; $len = $rem if $rem < $len; } $buffer = \substr($$buffer, $offset, $len) ; } return 0 if ! defined $$buffer || length $$buffer == 0 ; if (*$self->{Encoding}) { $$buffer = *$self->{Encoding}->encode($$buffer); } $self->filterUncompressed($buffer); my $buffer_length = defined $$buffer ? length($$buffer) : 0 ; *$self->{UnCompSize}->add($buffer_length) ; my $outBuffer=''; my $status = *$self->{Compress}->compr($buffer, $outBuffer) ; return $self->saveErrorString(undef, *$self->{Compress}{Error}, *$self->{Compress}{ErrorNo}) if $status == STATUS_ERROR; *$self->{CompSize}->add(length $outBuffer) ; $self->output($outBuffer) or return undef; return $buffer_length; } sub print { my $self = shift; #if (ref $self) { # $self = *$self{GLOB} ; #} if (defined $\) { if (defined $,) { defined $self->syswrite(join($,, @_) . $\); } else { defined $self->syswrite(join("", @_) . $\); } } else { if (defined $,) { defined $self->syswrite(join($,, @_)); } else { defined $self->syswrite(join("", @_)); } } } sub printf { my $self = shift; my $fmt = shift; defined $self->syswrite(sprintf($fmt, @_)); } sub flush { my $self = shift ; my $outBuffer=''; my $status = *$self->{Compress}->flush($outBuffer, @_) ; return $self->saveErrorString(0, *$self->{Compress}{Error}, *$self->{Compress}{ErrorNo}) if $status == STATUS_ERROR; if ( defined *$self->{FH} ) { *$self->{FH}->clearerr(); } *$self->{CompSize}->add(length $outBuffer) ; $self->output($outBuffer) or return 0; if ( defined *$self->{FH} ) { defined *$self->{FH}->flush() or return $self->saveErrorString(0, $!, $!); } return 1; } sub newStream { my $self = shift ; $self->_writeTrailer() or return 0 ; my $got = $self->checkParams('newStream', *$self->{Got}, @_) or return 0 ; $self->ckParams($got) or $self->croakError("newStream: $self->{Error}"); *$self->{Compress} = $self->mkComp($got) or return 0; *$self->{Header} = $self->mkHeader($got) ; $self->output(*$self->{Header} ) or return 0; *$self->{UnCompSize}->reset(); *$self->{CompSize}->reset(); return 1 ; } sub reset { my $self = shift ; return *$self->{Compress}->reset() ; } sub _writeTrailer { my $self = shift ; my $trailer = ''; my $status = *$self->{Compress}->close($trailer) ; return $self->saveErrorString(0, *$self->{Compress}{Error}, *$self->{Compress}{ErrorNo}) if $status == STATUS_ERROR; *$self->{CompSize}->add(length $trailer) ; $trailer .= $self->mkTrailer(); defined $trailer or return 0; return $self->output($trailer); } sub _writeFinalTrailer { my $self = shift ; return $self->output($self->mkFinalTrailer()); } sub close { my $self = shift ; return 1 if *$self->{Closed} || ! *$self->{Compress} ; *$self->{Closed} = 1 ; untie *$self if $] >= 5.008 ; $self->_writeTrailer() or return 0 ; $self->_writeFinalTrailer() or return 0 ; $self->output( "", 1 ) or return 0; if (defined *$self->{FH}) { #if (! *$self->{Handle} || *$self->{AutoClose}) { if ((! *$self->{Handle} || *$self->{AutoClose}) && ! *$self->{StdIO}) { $! = 0 ; *$self->{FH}->close() or return $self->saveErrorString(0, $!, $!); } delete *$self->{FH} ; # This delete can set $! in older Perls, so reset the errno $! = 0 ; } return 1; } #sub total_in #sub total_out #sub msg # #sub crc #{ # my $self = shift ; # return *$self->{Compress}->crc32() ; #} # #sub msg #{ # my $self = shift ; # return *$self->{Compress}->msg() ; #} # #sub dict_adler #{ # my $self = shift ; # return *$self->{Compress}->dict_adler() ; #} # #sub get_Level #{ # my $self = shift ; # return *$self->{Compress}->get_Level() ; #} # #sub get_Strategy #{ # my $self = shift ; # return *$self->{Compress}->get_Strategy() ; #} sub tell { my $self = shift ; return *$self->{UnCompSize}->get32bit() ; } sub eof { my $self = shift ; return *$self->{Closed} ; } sub seek { my $self = shift ; my $position = shift; my $whence = shift ; my $here = $self->tell() ; my $target = 0 ; #use IO::Handle qw(SEEK_SET SEEK_CUR SEEK_END); use IO::Handle ; if ($whence == IO::Handle::SEEK_SET) { $target = $position ; } elsif ($whence == IO::Handle::SEEK_CUR || $whence == IO::Handle::SEEK_END) { $target = $here + $position ; } else { $self->croakError(*$self->{ClassName} . "::seek: unknown value, $whence, for whence parameter"); } # short circuit if seeking to current offset return 1 if $target == $here ; # Outlaw any attempt to seek backwards $self->croakError(*$self->{ClassName} . "::seek: cannot seek backwards") if $target < $here ; # Walk the file to the new offset my $offset = $target - $here ; my $buffer ; defined $self->syswrite("\x00" x $offset) or return 0; return 1 ; } sub binmode { 1; # my $self = shift ; # return defined *$self->{FH} # ? binmode *$self->{FH} # : 1 ; } sub fileno { my $self = shift ; return defined *$self->{FH} ? *$self->{FH}->fileno() : undef ; } sub opened { my $self = shift ; return ! *$self->{Closed} ; } sub autoflush { my $self = shift ; return defined *$self->{FH} ? *$self->{FH}->autoflush(@_) : undef ; } sub input_line_number { return undef ; } sub _notAvailable { my $name = shift ; return sub { croak "$name Not Available: File opened only for output" ; } ; } *read = _notAvailable('read'); *READ = _notAvailable('read'); *readline = _notAvailable('readline'); *READLINE = _notAvailable('readline'); *getc = _notAvailable('getc'); *GETC = _notAvailable('getc'); *FILENO = \&fileno; *Ppackage IO::Compress::Base::Common; use strict ; use warnings; use bytes; use Carp; use Scalar::Util qw(blessed readonly); use File::GlobMapper; require Exporter; our ($VERSION, @ISA, @EXPORT, %EXPORT_TAGS, $HAS_ENCODE); @ISA = qw(Exporter); $VERSION = '2.033'; @EXPORT = qw( isaFilehandle isaFilename whatIsInput whatIsOutput isaFileGlobString cleanFileGlobString oneTarget setBinModeInput setBinModeOutput ckInOutParams createSelfTiedObject getEncoding WANT_CODE WANT_EXT WANT_UNDEF WANT_HASH STATUS_OK STATUS_ENDSTREAM STATUS_EOF STATUS_ERROR ); %EXPORT_TAGS = ( Status => [qw( STATUS_OK STATUS_ENDSTREAM STATUS_EOF STATUS_ERROR )]); use constant STATUS_OK => 0; use constant STATUS_ENDSTREAM => 1; use constant STATUS_EOF => 2; use constant STATUS_ERROR => -1; sub hasEncode() { if (! defined $HAS_ENCODE) { eval { require Encode; Encode->import(); }; $HAS_ENCODE = $@ ? 0 : 1 ; } return $HAS_ENCODE; } sub getEncoding($$$) { my $obj = shift; my $class = shift ; my $want_encoding = shift ; $obj->croakError("$class: Encode module needed to use -Encode") if ! hasEncode(); my $encoding = Encode::find_encoding($want_encoding); $obj->croakError("$class: Encoding '$want_encoding' is not available") if ! $encoding; return $encoding; } our ($needBinmode); $needBinmode = ($^O eq 'MSWin32' || ($] >= 5.006 && eval ' ${^UNICODE} || ${^UTF8LOCALE} ')) ? 1 : 1 ; sub setBinModeInput($) { my $handle = shift ; binmode $handle if $needBinmode; } sub setBinModeOutput($) { my $handle = shift ; binmode $handle if $needBinmode; } sub isaFilehandle($) { use utf8; # Pragma needed to keep Perl 5.6.0 happy return (defined $_[0] and (UNIVERSAL::isa($_[0],'GLOB') or UNIVERSAL::isa($_[0],'IO::Handle') or UNIVERSAL::isa(\$_[0],'GLOB')) ) } sub isaFilename($) { return (defined $_[0] and ! ref $_[0] and UNIVERSAL::isa(\$_[0], 'SCALAR')); } sub isaFileGlobString { return defined $_[0] && $_[0] =~ /^<.*>$/; } sub cleanFileGlobString { my $string = shift ; $string =~ s/^\s*<\s*(.*)\s*>\s*$/$1/; return $string; } use constant WANT_CODE => 1 ; use constant WANT_EXT => 2 ; use constant WANT_UNDEF => 4 ; #use constant WANT_HASH => 8 ; use constant WANT_HASH => 0 ; sub whatIsInput($;$) { my $got = whatIs(@_); if (defined $got && $got eq 'filename' && defined $_[0] && $_[0] eq '-') { #use IO::File; $got = 'handle'; $_[0] = *STDIN; #$_[0] = new IO::File("<-"); } return $got; } sub whatIsOutput($;$) { my $got = whatIs(@_); if (defined $got && $got eq 'filename' && defined $_[0] && $_[0] eq '-') { $got = 'handle'; $_[0] = *STDOUT; #$_[0] = new IO::File(">-"); } return $got; } sub whatIs ($;$) { return 'handle' if isaFilehandle($_[0]); my $wantCode = defined $_[1] && $_[1] & WANT_CODE ; my $extended = defined $_[1] && $_[1] & WANT_EXT ; my $undef = defined $_[1] && $_[1] & WANT_UNDEF ; my $hash = defined $_[1] && $_[1] & WANT_HASH ; return 'undef' if ! defined $_[0] && $undef ; if (ref $_[0]) { return '' if blessed($_[0]); # is an object #return '' if UNIVERSAL::isa($_[0], 'UNIVERSAL'); # is an object return 'buffer' if UNIVERSAL::isa($_[0], 'SCALAR'); return 'array' if UNIVERSAL::isa($_[0], 'ARRAY') && $extended ; return 'hash' if UNIVERSAL::isa($_[0], 'HASH') && $hash ; return 'code' if UNIVERSAL::isa($_[0], 'CODE') && $wantCode ; return ''; } return 'fileglob' if $extended && isaFileGlobString($_[0]); return 'filename'; } sub oneTarget { return $_[0] =~ /^(code|handle|buffer|filename)$/; } sub IO::Compress::Base::Validator::new { my $class = shift ; my $Class = shift ; my $error_ref = shift ; my $reportClass = shift ; my %data = (Class => $Class, Error => $error_ref, reportClass => $reportClass, ) ; my $obj = bless \%data, $class ; local $Carp::CarpLevel = 1; my $inType = $data{inType} = whatIsInput($_[0], WANT_EXT|WANT_HASH); my $outType = $data{outType} = whatIsOutput($_[1], WANT_EXT|WANT_HASH); my $oneInput = $data{oneInput} = oneTarget($inType); my $oneOutput = $data{oneOutput} = oneTarget($outType); if (! $inType) { $obj->croakError("$reportClass: illegal input parameter") ; #return undef ; } # if ($inType eq 'hash') # { # $obj->{Hash} = 1 ; # $obj->{oneInput} = 1 ; # return $obj->validateHash($_[0]); # } if (! $outType) { $obj->croakError("$reportClass: illegal output parameter") ; #return undef ; } if ($inType ne 'fileglob' && $outType eq 'fileglob') { $obj->croakError("Need input fileglob for outout fileglob"); } # if ($inType ne 'fileglob' && $outType eq 'hash' && $inType ne 'filename' ) # { # $obj->croakError("input must ne filename or fileglob when output is a hash"); # } if ($inType eq 'fileglob' && $outType eq 'fileglob') { $data{GlobMap} = 1 ; $data{inType} = $data{outType} = 'filename'; my $mapper = new File::GlobMapper($_[0], $_[1]); if ( ! $mapper ) { return $obj->saveErrorString($File::GlobMapper::Error) ; } $data{Pairs} = $mapper->getFileMap(); return $obj; } $obj->croakError("$reportClass: input and output $inType are identical") if $inType eq $outType && $_[0] eq $_[1] && $_[0] ne '-' ; if ($inType eq 'fileglob') # && $outType ne 'fileglob' { my $glob = cleanFileGlobString($_[0]); my @inputs = glob($glob); if (@inputs == 0) { # TODO -- legal or die? die "globmap matched zero file -- legal or die???" ; } elsif (@inputs == 1) { $obj->validateInputFilenames($inputs[0]) or return undef; $_[0] = $inputs[0] ; $data{inType} = 'filename' ; $data{oneInput} = 1; } else { $obj->validateInputFilenames(@inputs) or return undef; $_[0] = [ @inputs ] ; $data{inType} = 'filenames' ; } } elsif ($inType eq 'filename') { $obj->validateInputFilenames($_[0]) or return undef; } elsif ($inType eq 'array') { $data{inType} = 'filenames' ; $obj->validateInputArray($_[0]) or return undef ; } return $obj->saveErrorString("$reportClass: output buffer is read-only") if $outType eq 'buffer' && readonly(${ $_[1] }); if ($outType eq 'filename' ) { $obj->croakError("$reportClass: output filename is undef or null string") if ! defined $_[1] || $_[1] eq '' ; if (-e $_[1]) { if (-d _ ) { return $obj->saveErrorString("output file '$_[1]' is a directory"); } } } return $obj ; } sub IO::Compress::Base::Validator::saveErrorString { my $self = shift ; ${ $self->{Error} } = shift ; return undef; } sub IO::Compress::Base::Validator::croakError { my $self = shift ; $self->saveErrorString($_[0]); croak $_[0]; } sub IO::Compress::Base::Validator::validateInputFilenames { my $self = shift ; foreach my $filename (@_) { $self->croakError("$self->{reportClass}: input filename is undef or null string") if ! defined $filename || $filename eq '' ; next if $filename eq '-'; if (! -e $filename ) { return $self->saveErrorString("input file '$filename' does not exist"); } if (-d _ ) { return $self->saveErrorString("input file '$filename' is a directory"); } if (! -r _ ) { return $self->saveErrorString("cannot open file '$filename': $!"); } } return 1 ; } sub IO::Compress::Base::Validator::validateInputArray { my $self = shift ; if ( @{ $_[0] } == 0 ) { return $self->saveErrorString("empty array reference") ; } foreach my $element ( @{ $_[0] } ) { my $inType = whatIsInput($element); if (! $inType) { $self->croakError("unknown input parameter") ; } elsif($inType eq 'filename') { $self->validateInputFilenames($element) or return undef ; } else { $self->croakError("not a filename") ; } } return 1 ; } #sub IO::Compress::Base::Validator::validateHash #{ # my $self = shift ; # my $href = shift ; # # while (my($k, $v) = each %$href) # { # my $ktype = whatIsInput($k); # my $vtype = whatIsOutput($v, WANT_EXT|WANT_UNDEF) ; # # if ($ktype ne 'filename') # { # return $self->saveErrorString("hash key not filename") ; # } # # my %valid = map { $_ => 1 } qw(filename buffer array undef handle) ; # if (! $valid{$vtype}) # { # return $self->saveErrorString("hash value not ok") ; # } # } # # return $self ; #} sub createSelfTiedObject { my $class = shift || (caller)[0] ; my $error_ref = shift ; my $obj = bless Symbol::gensym(), ref($class) || $class; tie *$obj, $obj if $] >= 5.005; *$obj->{Closed} = 1 ; $$error_ref = ''; *$obj->{Error} = $error_ref ; my $errno = 0 ; *$obj->{ErrorNo} = \$errno ; return $obj; } #package Parse::Parameters ; # # #require Exporter; #our ($VERSION, @ISA, @EXPORT); #$VERSION = '2.000_08'; #@ISA = qw(Exporter); $EXPORT_TAGS{Parse} = [qw( ParseParameters Parse_any Parse_unsigned Parse_signed Parse_boolean Parse_custom Parse_string Parse_multiple Parse_writable_scalar ) ]; push @EXPORT, @{ $EXPORT_TAGS{Parse} } ; use constant Parse_any => 0x01; use constant Parse_unsigned => 0x02; use constant Parse_signed => 0x04; use constant Parse_boolean => 0x08; use constant Parse_string => 0x10; use constant Parse_custom => 0x12; #use constant Parse_store_ref => 0x100 ; use constant Parse_multiple => 0x100 ; use constant Parse_writable => 0x200 ; use constant Parse_writable_scalar => 0x400 | Parse_writable ; use constant OFF_PARSED => 0 ; use constant OFF_TYPE => 1 ; use constant OFF_DEFAULT => 2 ; use constant OFF_FIXED => 3 ; use constant OFF_FIRST_ONLY => 4 ; use constant OFF_STICKY => 5 ; sub ParseParameters { my $level = shift || 0 ; my $sub = (caller($level + 1))[3] ; local $Carp::CarpLevel = 1 ; return $_[1] if @_ == 2 && defined $_[1] && UNIVERSAL::isa($_[1], "IO::Compress::Base::Parameters"); my $p = new IO::Compress::Base::Parameters() ; $p->parse(@_) or croak "$sub: $p->{Error}" ; return $p; } #package IO::Compress::Base::Parameters; use strict; use warnings; use Carp; sub IO::Compress::Base::Parameters::new { my $class = shift ; my $obj = { Error => '', Got => {}, } ; #return bless $obj, ref($class) || $class || __PACKAGE__ ; return bless $obj, 'IO::Compress::Base::Parameters' ; } sub IO::Compress::Base::Parameters::setError { my $self = shift ; my $error = shift ; my $retval = @_ ? shift : undef ; $self->{Error} = $error ; return $retval; } #sub getError #{ # my $self = shift ; # return $self->{Error} ; #} sub IO::Compress::Base::Parameters::parse { my $self = shift ; my $default = shift ; my $got = $self->{Got} ; my $firstTime = keys %{ $got } == 0 ; my $other; my (@Bad) ; my @entered = () ; # Allow the options to be passed as a hash reference or # as the complete hash. if (@_ == 0) { @entered = () ; } elsif (@_ == 1) { my $href = $_[0] ; return $self->setError("Expected even number of parameters, got 1") if ! defined $href or ! ref $href or ref $href ne "HASH" ; foreach my $key (keys %$href) { push @entered, $key ; push @entered, \$href->{$key} ; } } else { my $count = @_; return $self->setError("Expected even number of parameters, got $count") if $count % 2 != 0 ; for my $i (0.. $count / 2 - 1) { if ($_[2 * $i] eq '__xxx__') { $other = $_[2 * $i + 1] ; } else { push @entered, $_[2 * $i] ; push @entered, \$_[2 * $i + 1] ; } } } while (my ($key, $v) = each %$default) { croak "need 4 params [@$v]" if @$v != 4 ; my ($first_only, $sticky, $type, $value) = @$v ; my $x ; $self->_checkType($key, \$value, $type, 0, \$x) or return undef ; $key = lc $key; if ($firstTime || ! $sticky) { $x = [] if $type & Parse_multiple; $got->{$key} = [0, $type, $value, $x, $first_only, $sticky] ; } $got->{$key}[OFF_PARSED] = 0 ; } my %parsed = (); if ($other) { for my $key (keys %$default) { my $canonkey = lc $key; if ($other->parsed($canonkey)) { my $value = $other->value($canonkey); #print "SET '$canonkey' to $value [$$value]\n"; ++ $parsed{$canonkey}; $got->{$canonkey}[OFF_PARSED] = 1; $got->{$canonkey}[OFF_DEFAULT] = $value; $got->{$canonkey}[OFF_FIXED] = $value; } } } for my $i (0.. @entered / 2 - 1) { my $key = $entered[2* $i] ; my $value = $entered[2* $i+1] ; #print "Key [$key] Value [$value]" ; #print defined $$value ? "[$$value]\n" : "[undef]\n"; $key =~ s/^-// ; my $canonkey = lc $key; if ($got->{$canonkey} && ($firstTime || ! $got->{$canonkey}[OFF_FIRST_ONLY] )) { my $type = $got->{$canonkey}[OFF_TYPE] ; my $parsed = $parsed{$canonkey}; ++ $parsed{$canonkey}; return $self->setError("Muliple instances of '$key' found") if $parsed && ($type & Parse_multiple) == 0 ; my $s ; $self->_checkType($key, $value, $type, 1, \$s) or return undef ; $value = $$value ; if ($type & Parse_multiple) { $got->{$canonkey}[OFF_PARSED] = 1; push @{ $got->{$canonkey}[OFF_FIXED] }, $s ; } else { $got->{$canonkey} = [1, $type, $value, $s] ; } } else { push (@Bad, $key) } } if (@Bad) { my ($bad) = join(", ", @Bad) ; return $self->setError("unknown key value(s) $bad") ; } return 1; } sub IO::Compress::Base::Parameters::_checkType { my $self = shift ; my $key = shift ; my $value = shift ; my $type = shift ; my $validate = shift ; my $output = shift; #local $Carp::CarpLevel = $level ; #print "PARSE $type $key $value $validate $sub\n" ; if ($type & Parse_writable_scalar) { return $self->setError("Parameter '$key' not writable") if $validate && readonly $$value ; if (ref $$value) { return $self->setError("Parameter '$key' not a scalar reference") if $validate && ref $$value ne 'SCALAR' ; $$output = $$value ; } else { return $self->setError("Parameter '$key' not a scalar") if $validate && ref $value ne 'SCALAR' ; $$output = $value ; } return 1; } # if ($type & Parse_store_ref) # { # #$value = $$value # # if ref ${ $value } ; # # $$output = $value ; # return 1; # } $value = $$value ; if ($type & Parse_any) { $$output = $value ; return 1; } elsif ($type & Parse_unsigned) { return $self->setError("Parameter '$key' must be an unsigned int, got 'undef'") if $validate && ! defined $value ; return $self->setError("Parameter '$key' must be an unsigned int, got '$value'") if $validate && $value !~ /^\d+$/; $$output = defined $value ? $value : 0 ; return 1; } elsif ($type & Parse_signed) { return $self->setError("Parameter '$key' must be a signed int, got 'undef'") if $validate && ! defined $value ; return $self->setError("Parameter '$key' must be a signed int, got '$value'") if $validate && $value !~ /^-?\d+$/; $$output = defined $value ? $value : 0 ; return 1 ; } elsif ($type & Parse_boolean) { return $self->setError("Parameter '$key' must be an int, got '$value'") if $validate && defined $value && $value !~ /^\d*$/; $$output = defined $value ? $value != 0 : 0 ; return 1; } elsif ($type & Parse_string) { $$output = defined $value ? $value : "" ; return 1; } $$output = $value ; return 1; } sub IO::Compress::Base::Parameters::parsed { my $self = shift ; my $name = shift ; return $self->{Got}{lc $name}[OFF_PARSED] ; } sub IO::Compress::Base::Parameters::value { my $self = shift ; my $name = shift ; if (@_) { $self->{Got}{lc $name}[OFF_PARSED] = 1; $self->{Got}{lc $name}[OFF_DEFAULT] = $_[0] ; $self->{Got}{lc $name}[OFF_FIXED] = $_[0] ; } return $self->{Got}{lc $name}[OFF_FIXED] ; } sub IO::Compress::Base::Parameters::valueOrDefault { my $self = shift ; my $name = shift ; my $default = shift ; my $value = $self->{Got}{lc $name}[OFF_DEFAULT] ; return $value if defined $value ; return $default ; } sub IO::Compress::Base::Parameters::wantValue { my $self = shift ; my $name = shift ; return defined $self->{Got}{lc $name}[OFF_DEFAULT] ; } sub IO::Compress::Base::Parameters::clone { my $self = shift ; my $obj = { }; my %got ; while (my ($k, $v) = each %{ $self->{Got} }) { $got{$k} = [ @$v ]; } $obj->{Error} = $self->{Error}; $obj->{Got} = \%got ; return bless $obj, 'IO::Compress::Base::Parameters' ; } package U64; use constant MAX32 => 0xFFFFFFFF ; use constant HI_1 => MAX32 + 1 ; use constant LOW => 0 ; use constant HIGH => 1; sub new { my $class = shift ; my $high = 0 ; my $low = 0 ; if (@_ == 2) { $high = shift ; $low = shift ; } elsif (@_ == 1) { $low = shift ; } bless [$low, $high], $class; } sub newUnpack_V64 { my $string = shift; my ($low, $hi) = unpack "V V", $string ; bless [ $low, $hi ], "U64"; } sub newUnpack_V32 { my $string = shift; my $low = unpack "V", $string ; bless [ $low, 0 ], "U64"; } sub reset { my $self = shift; $self->[HIGH] = $self->[LOW] = 0; } sub clone { my $self = shift; bless [ @$self ], ref $self ; } sub getHigh { my $self = shift; return $self->[HIGH]; } sub getLow { my $self = shift; return $self->[LOW]; } sub get32bit { my $self = shift; return $self->[LOW]; } sub get64bit { my $self = shift; # Not using << here because the result will still be # a 32-bit value on systems where int size is 32-bits return $self->[HIGH] * HI_1 + $self->[LOW]; } sub add { my $self = shift; my $value = shift; if (ref $value eq 'U64') { $self->[HIGH] += $value->[HIGH] ; $value = $value->[LOW]; } my $available = MAX32 - $self->[LOW] ; if ($value > $available) { ++ $self->[HIGH] ; $self->[LOW] = $value - $available - 1; } else { $self->[LOW] += $value ; } } sub equal { my $self = shift; my $other = shift; return $self->[LOW] == $other->[LOW] && $self->[HIGH] == $other->[HIGH] ; } sub is64bit { my $self = shift; return $self->[HIGH] > 0 ; } sub getPacked_V64 { my $self = shift; return pack "V V", @$self ; } sub getPacked_V32 { my $self = shift; return pack "V", $self->[LOW] ; } sub pack_V64 { my $low = shift; return pack "V V", $low, 0; } package IO::Compress::Base::Common; 1; ject. See the L section for more details. =head1 Importing No symbolic constants are required by this IO::Compress::Bzip2 at present. =over 5 =item :all Imports C and C<$Bzip2Error>. Same as doing this use IO::Compress::Bzip2 qw(bzip2 $Bzip2Error) ; =back =head1 EXAMPLES =head2 Apache::GZip Revisited See L =head2 Working with Net::FTP See L =head1 SEE ALSO L, L, L, L, L, L, L, L, L, L, L, L, L, L L L, L, L, L The primary site for the bzip2 program is F. See the module L =head1 AUTHOR This module was written by Paul Marquess, F. =head1 MODIFICATION HISTORY See the Changes file. =head1 COPYRIGHT AND LICENSE Copyright (c) 2005-2008 Paul Marquess. All rights reserved. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. / .$ ..0 Constants.pmpackage IO::Compress::Bzip2 ; use strict ; use warnings; use bytes; require Exporter ; use IO::Compress::Base 2.033 ; use IO::Compress::Base::Common 2.033 qw(createSelfTiedObject); use IO::Compress::Adapter::Bzip2 2.033 ; our ($VERSION, @ISA, @EXPORT_OK, %EXPORT_TAGS, $Bzip2Error); $VERSION = '2.033'; $Bzip2Error = ''; @ISA = qw(Exporter IO::Compress::Base); @EXPORT_OK = qw( $Bzip2Error bzip2 ) ; %EXPORT_TAGS = %IO::Compress::Base::EXPORT_TAGS ; push @{ $EXPORT_TAGS{all} }, @EXPORT_OK ; Exporter::export_ok_tags('all'); sub new { my $class = shift ; my $obj = createSelfTiedObject($class, \$Bzip2Error); return $obj->_create(undef, @_); } sub bzip2 { my $obj = createSelfTiedObject(undef, \$Bzip2Error); $obj->_def(@_); } sub mkHeader { my $self = shift ; return ''; } sub getExtraParams { my $self = shift ; use IO::Compress::Base::Common 2.033 qw(:Parse); return ( 'BlockSize100K' => [0, 1, Parse_unsigned, 1], 'WorkFactor' => [0, 1, Parse_unsigned, 0], 'Verbosity' => [0, 1, Parse_boolean, 0], ); } sub ckParams { my $self = shift ; my $got = shift; # check that BlockSize100K is a number between 1 & 9 if ($got->parsed('BlockSize100K')) { my $value = $got->value('BlockSize100K'); return $self->saveErrorString(undef, "Parameter 'BlockSize100K' not between 1 and 9, got $value") unless defined $value && $value >= 1 && $value <= 9; } # check that WorkFactor between 0 & 250 if ($got->parsed('WorkFactor')) { my $value = $got->value('WorkFactor'); return $self->saveErrorString(undef, "Parameter 'WorkFactor' not between 0 and 250, got $value") unless $value >= 0 && $value <= 250; } return 1 ; } sub mkComp { my $self = shift ; my $got = shift ; my $BlockSize100K = $got->value('BlockSize100K'); my $WorkFactor = $got->value('WorkFactor'); my $Verbosity = $got->value('Verbosity'); my ($obj, $errstr, $errno) = IO::Compress::Adapter::Bzip2::mkCompObject( $BlockSize100K, $WorkFactor, $Verbosity); return $self->saveErrorString(undef, $errstr, $errno) if ! defined $obj; return $obj; } sub mkTrailer { my $self = shift ; return ''; } sub mkFinalTrailer { return ''; } #sub newHeader #{ # my $self = shift ; # return ''; #} sub getInverseClass { return ('IO::Uncompress::Bunzip2'); } sub getFileInfo { my $self = shift ; my $params = shift; my $file = shift ; } 1; __END__ =head1 NAME IO::Compress::Bzip2 - Write bzip2 files/buffers =head1 SYNOPSIS use IO::Compress::Bzip2 qw(bzip2 $Bzip2Error) ; my $status = bzip2 $input => $output [,OPTS] or die "bzip2 failed: $Bzip2Error\n"; my $z = new IO::Compress::Bzip2 $output [,OPTS] or die "bzip2 failed: $Bzip2Error\n"; $z->print($string); $z->printf($format, $string); $z->write($string); $z->syswrite($string [, $length, $offset]); $z->flush(); $z->tell(); $z->eof(); $z->seek($position, $whence); $z->binmode(); $z->fileno(); $z->opened(); $z->autoflush(); $z->input_line_number(); $z->newStream( [OPTS] ); $z->close() ; $Bzip2Error ; # IO::File mode print $z $string; printf $z $format, $string; tell $z eof $z seek $z, $position, $whence binmode $z fileno $z close $z ; =head1 DESCRIPTION This module provides a Perl interface that allows writing bzip2 compressed data to files or buffer. For reading bzip2 files/buffers, see the companion module L. =head1 Functional Interface A top-level function, C, is provided to carry out "one-shot" compression between buffers and/or files. For finer control over the compression process, see the L section. use IO::Compress::Bzip2 qw(bzip2 $Bzip2Error) ; bzip2 $input => $output [,OPTS] or die "bzip2 failed: $Bzip2Error\n"; The functional interface needs Perl5.005 or better. =head2 bzip2 $input => $output [, OPTS] C expects at least two parameters, C<$input> and C<$output>. =head3 The C<$input> parameter The parameter, C<$input>, is used to define the source of the uncompressed data. It can take one of the following forms: =over 5 =item A filename If the C<$input> parameter is a simple scalar, it is assumed to be a filename. This file will be opened for reading and the input data will be read from it. =item A filehandle If the C<$input> parameter is a filehandle, the input data will be read from it. The string '-' can be used as an alias for standard input. =item A scalar reference If C<$input> is a scalar reference, the input data will be read from C<$$input>. =item An array reference If C<$input> is an array reference, each element in the array must be a filename. The input data will be read from each file in turn. The complete array will be walked to ensure that it only contains valid filenames before any data is compressed. =item An Input FileGlob string If C<$input> is a string that is delimited by the characters "<" and ">" C will assume that it is an I. The input is the list of files that match the fileglob. If the fileglob does not match any files ... See L for more details. =back If the C<$input> parameter is any other type, C will be returned. =head3 The C<$output> parameter The parameter C<$output> is used to control the destination of the compressed data. This parameter can take one of these forms. =over 5 =item A filename If the C<$output> parameter is a simple scalar, it is assumed to be a filename. This file will be opened for writing and the compressed data will be written to it. =item A filehandle If the C<$output> parameter is a filehandle, the compressed data will be written to it. The string '-' can be used as an alias for standard output. =item A scalar reference If C<$output> is a scalar reference, the compressed data will be stored in C<$$output>. =item An Array Reference If C<$output> is an array reference, the compressed data will be pushed onto the array. =item An Output FileGlob If C<$output> is a string that is delimited by the characters "<" and ">" C will assume that it is an I. The output is the list of files that match the fileglob. When C<$output> is an fileglob string, C<$input> must also be a fileglob string. Anything else is an error. =back If the C<$output> parameter is any other type, C will be returned. =head2 Notes When C<$input> maps to multiple files/buffers and C<$output> is a single file/buffer the input files/buffers will be stored in C<$output> as a concatenated series of compressed data streams. =head2 Optional Parameters Unless specified below, the optional parameters for C, C, are the same as those used with the OO interface defined in the L section below. =over 5 =item C<< AutoClose => 0|1 >> This option applies to any input or output data streams to C that are filehandles. If C is specified, and the value is true, it will result in all input and/or output filehandles being closed once C has completed. This parameter defaults to 0. =item C<< BinModeIn => 0|1 >> When reading from a file or filehandle, set C before reading. Defaults to 0. =item C<< Append => 0|1 >> TODO =back =head2 Examples To read the contents of the file C and write the compressed data to the file C. use strict ; use warnings ; use IO::Compress::Bzip2 qw(bzip2 $Bzip2Error) ; my $input = "file1.txt"; bzip2 $input => "$input.bz2" or die "bzip2 failed: $Bzip2Error\n"; To read from an existing Perl filehandle, C<$input>, and write the compressed data to a buffer, C<$buffer>. use strict ; use warnings ; use IO::Compress::Bzip2 qw(bzip2 $Bzip2Error) ; use IO::File ; my $input = new IO::File " \$buffer or die "bzip2 failed: $Bzip2Error\n"; To compress all files in the directory "/my/home" that match "*.txt" and store the compressed data in the same directory use strict ; use warnings ; use IO::Compress::Bzip2 qw(bzip2 $Bzip2Error) ; bzip2 '' => '<*.bz2>' or die "bzip2 failed: $Bzip2Error\n"; and if you want to compress each file one at a time, this will do the trick use strict ; use warnings ; use IO::Compress::Bzip2 qw(bzip2 $Bzip2Error) ; for my $input ( glob "/my/home/*.txt" ) { my $output = "$input.bz2" ; bzip2 $input => $output or die "Error compressing '$input': $Bzip2Error\n"; } =head1 OO Interface =head2 Constructor The format of the constructor for C is shown below my $z = new IO::Compress::Bzip2 $output [,OPTS] or die "IO::Compress::Bzip2 failed: $Bzip2Error\n"; It returns an C object on success and undef on failure. The variable C<$Bzip2Error> will contain an error message on failure. If you are running Perl 5.005 or better the object, C<$z>, returned from IO::Compress::Bzip2 can be used exactly like an L filehandle. This means that all normal output file operations can be carried out with C<$z>. For example, to write to a compressed file/buffer you can use either of these forms $z->print("hello world\n"); print $z "hello world\n"; The mandatory parameter C<$output> is used to control the destination of the compressed data. This parameter can take one of these forms. =over 5 =item A filename If the C<$output> parameter is a simple scalar, it is assumed to be a filename. This file will be opened for writing and the compressed data will be written to it. =item A filehandle If the C<$output> parameter is a filehandle, the compressed data will be written to it. The string '-' can be used as an alias for standard output. =item A scalar reference If C<$output> is a scalar reference, the compressed data will be stored in C<$$output>. =back If the C<$output> parameter is any other type, C::new will return undef. =head2 Constructor Options C is any combination of the following options: =over 5 =item C<< AutoClose => 0|1 >> This option is only valid when the C<$output> parameter is a filehandle. If specified, and the value is true, it will result in the C<$output> being closed once either the C method is called or the C object is destroyed. This parameter defaults to 0. =item C<< Append => 0|1 >> Opens C<$output> in append mode. The behaviour of this option is dependent on the type of C<$output>. =over 5 =item * A Buffer If C<$output> is a buffer and C is enabled, all compressed data will be append to the end if C<$output>. Otherwise C<$output> will be cleared before any data is written to it. =item * A Filename If C<$output> is a filename and C is enabled, the file will be opened in append mode. Otherwise the contents of the file, if any, will be truncated before any compressed data is written to it. =item * A Filehandle If C<$output> is a filehandle, the file pointer will be positioned to the end of the file via a call to C before any compressed data is written to it. Otherwise the file pointer will not be moved. =back This parameter defaults to 0. =item C<< BlockSize100K => number >> Specify the number of 100K blocks bzip2 uses during compression. Valid values are from 1 to 9, where 9 is best compression. The default is 1. =item C<< WorkFactor => number >> Specifies how much effort bzip2 should take before resorting to a slower fallback compression algorithm. Valid values range from 0 to 250, where 0 means use the default value 30. The default is 0. =item C<< Strict => 0|1 >> This is a placeholder option. =back =head2 Examples TODO =head1 Methods =head2 print Usage is $z->print($data) print $z $data Compresses and outputs the contents of the C<$data> parameter. This has the same behaviour as the C built-in. Returns true if successful. =head2 printf Usage is $z->printf($format, $data) printf $z $format, $data Compresses and outputs the contents of the C<$data> parameter. Returns true if successful. =head2 syswrite Usage is $z->syswrite $data $z->syswrite $data, $length $z->syswrite $data, $length, $offset Compresses and outputs the contents of the C<$data> parameter. Returns the number of uncompressed bytes written, or C if unsuccessful. =head2 write Usage is $z->write $data $z->write $data, $length $z->write $data, $length, $offset Compresses and outputs the contents of the C<$data> parameter. Returns the number of uncompressed bytes written, or C if unsuccessful. =head2 flush Usage is $z->flush; Flushes any pending compressed data to the output file/buffer. TODO Returns true on success. =head2 tell Usage is $z->tell() tell $z Returns the uncompressed file offset. =head2 eof Usage is $z->eof(); eof($z); Returns true if the C method has been called. =head2 seek $z->seek($position, $whence); seek($z, $position, $whence); Provides a sub-set of the C functionality, with the restriction that it is only legal to seek forward in the output file/buffer. It is a fatal error to attempt to seek backward. Empty parts of the file/buffer will have NULL (0x00) bytes written to them. The C<$whence> parameter takes one the usual values, namely SEEK_SET, SEEK_CUR or SEEK_END. Returns 1 on success, 0 on failure. =head2 binmode Usage is $z->binmode binmode $z ; This is a noop provided for completeness. =head2 opened $z->opened() Returns true if the object currently refers to a opened file/buffer. =head2 autoflush my $prev = $z->autoflush() my $prev = $z->autoflush(EXPR) If the C<$z> object is associated with a file or a filehandle, this method returns the current autoflush setting for the underlying filehandle. If C is present, and is non-zero, it will enable flushing after every write/print operation. If C<$z> is associated with a buffer, this method has no effect and always returns C. B that the special variable C<$|> B be used to set or retrieve the autoflush setting. =head2 input_line_number $z->input_line_number() $z->input_line_number(EXPR) This method always returns C when compressing. =head2 fileno $z->fileno() fileno($z) If the C<$z> object is associated with a file or a filehandle, C will return the underlying file descriptor. Once the C method is called C will return C. If the C<$z> object is is associated with a buffer, this method will return C. =head2 close $z->close() ; close $z ; Flushes any pending compressed data and then closes the output file/buffer. For most versions of Perl this method will be automatically invoked if the IO::Compress::Bzip2 object is destroyed (either explicitly or by the variable with the reference to the object going out of scope). The exceptions are Perl versions 5.005 through 5.00504 and 5.8.0. In these cases, the C method will be called automatically, but not until global destruction of all live objects when the program is terminating. Therefore, if you want your scripts to be able to run on all versions of Perl, you should call C explicitly and not rely on automatic closing. Returns true on success, otherwise 0. If the C option has been enabled when the IO::Compress::Bzip2 object was created, and the object is associated with a file, the underlying file will also be closed. =head2 newStream([OPTS]) Usage is $z->newStream( [OPTS] ) Closes the current compressed data stream and starts a new one. OPTS consists of any of the the options that are available when creating the C<$z> obpackage IO::Compress::Deflate ; use strict ; use warnings; use bytes; require Exporter ; use IO::Compress::RawDeflate 2.033 ; use Compress::Raw::Zlib 2.033 ; use IO::Compress::Zlib::Constants 2.033 ; use IO::Compress::Base::Common 2.033 qw(createSelfTiedObject); our ($VERSION, @ISA, @EXPORT_OK, %EXPORT_TAGS, $DeflateError); $VERSION = '2.033'; $DeflateError = ''; @ISA = qw(Exporter IO::Compress::RawDeflate); @EXPORT_OK = qw( $DeflateError deflate ) ; %EXPORT_TAGS = %IO::Compress::RawDeflate::DEFLATE_CONSTANTS ; push @{ $EXPORT_TAGS{all} }, @EXPORT_OK ; Exporter::export_ok_tags('all'); sub new { my $class = shift ; my $obj = createSelfTiedObject($class, \$DeflateError); return $obj->_create(undef, @_); } sub deflate { my $obj = createSelfTiedObject(undef, \$DeflateError); return $obj->_def(@_); } sub bitmask($$$$) { my $into = shift ; my $value = shift ; my $offset = shift ; my $mask = shift ; return $into | (($value & $mask) << $offset ) ; } sub mkDeflateHdr($$$;$) { my $method = shift ; my $cinfo = shift; my $level = shift; my $fdict_adler = shift ; my $cmf = 0; my $flg = 0; my $fdict = 0; $fdict = 1 if defined $fdict_adler; $cmf = bitmask($cmf, $method, ZLIB_CMF_CM_OFFSET, ZLIB_CMF_CM_BITS); $cmf = bitmask($cmf, $cinfo, ZLIB_CMF_CINFO_OFFSET, ZLIB_CMF_CINFO_BITS); $flg = bitmask($flg, $fdict, ZLIB_FLG_FDICT_OFFSET, ZLIB_FLG_FDICT_BITS); $flg = bitmask($flg, $level, ZLIB_FLG_LEVEL_OFFSET, ZLIB_FLG_LEVEL_BITS); my $fcheck = 31 - ($cmf * 256 + $flg) % 31 ; $flg = bitmask($flg, $fcheck, ZLIB_FLG_FCHECK_OFFSET, ZLIB_FLG_FCHECK_BITS); my $hdr = pack("CC", $cmf, $flg) ; $hdr .= pack("N", $fdict_adler) if $fdict ; return $hdr; } sub mkHeader { my $self = shift ; my $param = shift ; my $level = $param->value('Level'); my $strategy = $param->value('Strategy'); my $lflag ; $level = 6 if $level == Z_DEFAULT_COMPRESSION ; if (ZLIB_VERNUM >= 0x1210) { if ($strategy >= Z_HUFFMAN_ONLY || $level < 2) { $lflag = ZLIB_FLG_LEVEL_FASTEST } elsif ($level < 6) { $lflag = ZLIB_FLG_LEVEL_FAST } elsif ($level == 6) { $lflag = ZLIB_FLG_LEVEL_DEFAULT } else { $lflag = ZLIB_FLG_LEVEL_SLOWEST } } else { $lflag = ($level - 1) >> 1 ; $lflag = 3 if $lflag > 3 ; } #my $wbits = (MAX_WBITS - 8) << 4 ; my $wbits = 7; mkDeflateHdr(ZLIB_CMF_CM_DEFLATED, $wbits, $lflag); } sub ckParams { my $self = shift ; my $got = shift; $got->value('ADLER32' => 1); return 1 ; } sub mkTrailer { my $self = shift ; return pack("N", *$self->{Compress}->adler32()) ; } sub mkFinalTrailer { return ''; } #sub newHeader #{ # my $self = shift ; # return *$self->{Header}; #} sub getExtraParams { my $self = shift ; return $self->getZlibParams(), } sub getInverseClass { return ('IO::Uncompress::Inflate', \$IO::Uncompress::Inflate::InflateError); } sub getFileInfo { my $self = shift ; my $params = shift; my $file = shift ; } 1; __END__ =head1 NAME IO::Compress::Deflate - Write RFC 1950 files/buffers =head1 SYNOPSIS use IO::Compress::Deflate qw(deflate $DeflateError) ; my $status = deflate $input => $output [,OPTS] or die "deflate failed: $DeflateError\n"; my $z = new IO::Compress::Deflate $output [,OPTS] or die "deflate failed: $DeflateError\n"; $z->print($string); $z->printf($format, $string); $z->write($string); $z->syswrite($string [, $length, $offset]); $z->flush(); $z->tell(); $z->eof(); $z->seek($position, $whence); $z->binmode(); $z->fileno(); $z->opened(); $z->autoflush(); $z->input_line_number(); $z->newStream( [OPTS] ); $z->deflateParams(); $z->close() ; $DeflateError ; # IO::File mode print $z $string; printf $z $format, $string; tell $z eof $z seek $z, $position, $whence binmode $z fileno $z close $z ; =head1 DESCRIPTION This module provides a Perl interface that allows writing compressed data to files or buffer as defined in RFC 1950. For reading RFC 1950 files/buffers, see the companion module L. =head1 Functional Interface A top-level function, C, is provided to carry out "one-shot" compression between buffers and/or files. For finer control over the compression process, see the L section. use IO::Compress::Deflate qw(deflate $DeflateError) ; deflate $input => $output [,OPTS] or die "deflate failed: $DeflateError\n"; The functional interface needs Perl5.005 or better. =head2 deflate $input => $output [, OPTS] C expects at least two parameters, C<$input> and C<$output>. =head3 The C<$input> parameter The parameter, C<$input>, is used to define the source of the uncompressed data. It can take one of the following forms: =over 5 =item A filename If the C<$input> parameter is a simple scalar, it is assumed to be a filename. This file will be opened for reading and the input data will be read from it. =item A filehandle If the C<$input> parameter is a filehandle, the input data will be read from it. The string '-' can be used as an alias for standard input. =item A scalar reference If C<$input> is a scalar reference, the input data will be read from C<$$input>. =item An array reference If C<$input> is an array reference, each element in the array must be a filename. The input data will be read from each file in turn. The complete array will be walked to ensure that it only contains valid filenames before any data is compressed. =item An Input FileGlob string If C<$input> is a string that is delimited by the characters "<" and ">" C will assume that it is an I. The input is the list of files that match the fileglob. See L for more details. =back If the C<$input> parameter is any other type, C will be returned. =head3 The C<$output> parameter The parameter C<$output> is used to control the destination of the compressed data. This parameter can take one of these forms. =over 5 =item A filename If the C<$output> parameter is a simple scalar, it is assumed to be a filename. This file will be opened for writing and the compressed data will be written to it. =item A filehandle If the C<$output> parameter is a filehandle, the compressed data will be written to it. The string '-' can be used as an alias for standard output. =item A scalar reference If C<$output> is a scalar reference, the compressed data will be stored in C<$$output>. =item An Array Reference If C<$output> is an array reference, the compressed data will be pushed onto the array. =item An Output FileGlob If C<$output> is a string that is delimited by the characters "<" and ">" C will assume that it is an I. The output is the list of files that match the fileglob. When C<$output> is an fileglob string, C<$input> must also be a fileglob string. Anything else is an error. See L for more details. =back If the C<$output> parameter is any other type, C will be returned. =head2 Notes When C<$input> maps to multiple files/buffers and C<$output> is a single file/buffer the input files/buffers will be stored in C<$output> as a concatenated series of compressed data streams. =head2 Optional Parameters Unless specified below, the optional parameters for C, C, are the same as those used with the OO interface defined in the L section below. =over 5 =item C<< AutoClose => 0|1 >> This option applies to any input or output data streams to C that are filehandles. If C is specified, and the value is true, it will result in all input and/or output filehandles being closed once C has completed. This parameter defaults to 0. =item C<< BinModeIn => 0|1 >> When reading from a file or filehandle, set C before reading. Defaults to 0. =item C<< Append => 0|1 >> The behaviour of this option is dependent on the type of output data stream. =over 5 =item * A Buffer If C is enabled, all compressed data will be append to the end of the output buffer. Otherwise the output buffer will be cleared before any compressed data is written to it. =item * A Filename If C is enabled, the file will be opened in append mode. Otherwise the contents of the file, if any, will be truncated before any compressed data is written to it. =item * A Filehandle If C is enabled, the filehandle will be positioned to the end of the file via a call to C before any compressed data is written to it. Otherwise the file pointer will not be moved. =back When C is specified, and set to true, it will I all compressed data to the output data stream. So when the output is a filehandle it will carry out a seek to the eof before writing any compressed data. If the output is a filename, it will be opened for appending. If the output is a buffer, all compressed data will be appended to the existing buffer. Conversely when C is not specified, or it is present and is set to false, it will operate as follows. When the output is a filename, it will truncate the contents of the file before writing any compressed data. If the output is a filehandle its position will not be changed. If the output is a buffer, it will be wiped before any compressed data is output. Defaults to 0. =back =head2 Examples To read the contents of the file C and write the compressed data to the file C. use strict ; use warnings ; use IO::Compress::Deflate qw(deflate $DeflateError) ; my $input = "file1.txt"; deflate $input => "$input.1950" or die "deflate failed: $DeflateError\n"; To read from an existing Perl filehandle, C<$input>, and write the compressed data to a buffer, C<$buffer>. use strict ; use warnings ; use IO::Compress::Deflate qw(deflate $DeflateError) ; use IO::File ; my $input = new IO::File " \$buffer or die "deflate failed: $DeflateError\n"; To compress all files in the directory "/my/home" that match "*.txt" and store the compressed data in the same directory use strict ; use warnings ; use IO::Compress::Deflate qw(deflate $DeflateError) ; deflate '' => '<*.1950>' or die "deflate failed: $DeflateError\n"; and if you want to compress each file one at a time, this will do the trick use strict ; use warnings ; use IO::Compress::Deflate qw(deflate $DeflateError) ; for my $input ( glob "/my/home/*.txt" ) { my $output = "$input.1950" ; deflate $input => $output or die "Error compressing '$input': $DeflateError\n"; } =head1 OO Interface =head2 Constructor The format of the constructor for C is shown below my $z = new IO::Compress::Deflate $output [,OPTS] or die "IO::Compress::Deflate failed: $DeflateError\n"; It returns an C object on success and undef on failure. The variable C<$DeflateError> will contain an error message on failure. If you are running Perl 5.005 or better the object, C<$z>, returned from IO::Compress::Deflate can be used exactly like an L filehandle. This means that all normal output file operations can be carried out with C<$z>. For example, to write to a compressed file/buffer you can use either of these forms $z->print("hello world\n"); print $z "hello world\n"; The mandatory parameter C<$output> is used to control the destination of the compressed data. This parameter can take one of these forms. =over 5 =item A filename If the C<$output> parameter is a simple scalar, it is assumed to be a filename. This file will be opened for writing and the compressed data will be written to it. =item A filehandle If the C<$output> parameter is a filehandle, the compressed data will be written to it. The string '-' can be used as an alias for standard output. =item A scalar reference If C<$output> is a scalar reference, the compressed data will be stored in C<$$output>. =back If the C<$output> parameter is any other type, C::new will return undef. =head2 Constructor Options C is any combination of the following options: =over 5 =item C<< AutoClose => 0|1 >> This option is only valid when the C<$output> parameter is a filehandle. If specified, and the value is true, it will result in the C<$output> being closed once either the C method is called or the C object is destroyed. This parameter defaults to 0. =item C<< Append => 0|1 >> Opens C<$output> in append mode. The behaviour of this option is dependent on the type of C<$output>. =over 5 =item * A Buffer If C<$output> is a buffer and C is enabled, all compressed data will be append to the end of C<$output>. Otherwise C<$output> will be cleared before any data is written to it. =item * A Filename If C<$output> is a filename and C is enabled, the file will be opened in append mode. Otherwise the contents of the file, if any, will be truncated before any compressed data is written to it. =item * A Filehandle If C<$output> is a filehandle, the file pointer will be positioned to the end of the file via a call to C before any compressed data is written to it. Otherwise the file pointer will not be moved. =back This parameter defaults to 0. =item C<< Merge => 0|1 >> This option is used to compress input data and append it to an existing compressed data stream in C<$output>. The end result is a single compressed data stream stored in C<$output>. It is a fatal error to attempt to use this option when C<$output> is not an RFC 1950 data stream. There are a number of other limitations with the C option: =over 5 =item 1 This module needs to have been built with zlib 1.2.1 or better to work. A fatal error will be thrown if C is used with an older version of zlib. =item 2 If C<$output> is a file or a filehandle, it must be seekable. =back This parameter defaults to 0. =item -Level Defines the compression level used by zlib. The value should either be a number between 0 and 9 (0 means no compression and 9 is maximum compression), or one of the symbolic constants defined below. Z_NO_COMPRESSION Z_BEST_SPEED Z_BEST_COMPRESSION Z_DEFAULT_COMPRESSION The default is Z_DEFAULT_COMPRESSION. Note, these constants are not imported by C by default. use IO::Compress::Deflate qw(:strategy); use IO::Compress::Deflate qw(:constants); use IO::Compress::Deflate qw(:all); =item -Strategy Defines the strategy used to tune the compression. Use one of the symbolic constants defined below. Z_FILTERED Z_HUFFMAN_ONLY Z_RLE Z_FIXED Z_DEFAULT_STRATEGY The default is Z_DEFAULT_STRATEGY. =item C<< Strict => 0|1 >> This is a placeholder option. =back =head2 Examples TODO =head1 Methods =head2 print Usage is $z->print($data) print $z $data Compresses and outputs the contents of the C<$data> parameter. This has the same behaviour as the C built-in. Returns true if successful. =head2 printf Usage is $z->printf($format, $data) printf $z $format, $data Compresses and outputs the contents of the C<$data> parameter. Returns true if successful. =head2 syswrite Usage is $z->syswrite $data $z->syswrite $data, $length $z->syswrite $data, $length, $offset Compresses and outputs the contents of the C<$data> parameter. Returns the number of uncompressed bytes written, or C if unsuccessful. =head2 write Usage is $z->write $data $z->write $data, $length $z->write $data, $length, $offset Compresses and outputs the contents of the C<$data> parameter. Returns the number of uncompressed bytes written, or C if unsuccessful. =head2 flush Usage is $z->flush; $z->flush($flush_type); Flushes any pending compressed data to the output file/buffer. This method takes an optional parameter, C<$flush_type>, that controls how the flushing will be carried out. By default the C<$flush_type> used is C. Other valid values for C<$flush_type> are C, C, C and C. It is strongly recommended that you only set the C parameter if you fully understand the implications of what it does - overuse of C can seriously degrade the level of compression achieved. See the C documentation for details. Returns true on success. =head2 tell Usage is $z->tell() tell $z Returns the uncompressed file offset. =head2 eof Usage is $z->eof(); eof($z); Returns true if the C method has been called. =head2 seek $z->seek($position, $whence); seek($z, $position, $whence); Provides a sub-set of the C functionality, with the restriction that it is only legal to seek forward in the output file/buffer. It is a fatal error to attempt to seek backward. Empty parts of the file/buffer will have NULL (0x00) bytes written to them. The C<$whence> parameter takes one the usual values, namely SEEK_SET, SEEK_CUR or SEEK_END. Returns 1 on success, 0 on failure. =head2 binmode Usage is $z->binmode binmode $z ; This is a noop provided for completeness. =head2 opened $z->opened() Returns true if the object currently refers to a opened file/buffer. =head2 autoflush my $prev = $z->autoflush() my $prev = $z->autoflush(EXPR) If the C<$z> object is associated with a file or a filehandle, this method returns the current autoflush setting for the underlying filehandle. If C is present, and is non-zero, it will enable flushing after every write/print operation. If C<$z> is associated with a buffer, this method has no effect and always returns C. B that the special variable C<$|> B be used to set or retrieve the autoflush setting. =head2 input_line_number $z->input_line_number() $z->input_line_number(EXPR) This method always returns C when compressing. =head2 fileno $z->fileno() fileno($z) If the C<$z> object is associated with a file or a filehandle, C will return the underlying file descriptor. Once the C method is called C will return C. If the C<$z> object is is associated with a buffer, this method will return C. =head2 close $z->close() ; close $z ; Flushes any pending compressed data and then closes the output file/buffer. For most versions of Perl this method will be automatically invoked if the IO::Compress::Deflate object is destroyed (either explicitly or by the variable with the reference to the object going out of scope). The exceptions are Perl versions 5.005 through 5.00504 and 5.8.0. In these cases, the C method will be called automatically, but not until global destruction of all live objects when the program is terminating. Therefore, if you want your scripts to be able to run on all versions of Perl, you should call C explicitly and not rely on automatic closing. Returns true on success, otherwise 0. If the C option has been enabled when the IO::Compress::Deflate object was created, and the object is associated with a file, the underlying file will also be closed. =head2 newStream([OPTS]) Usage is $z->newStream( [OPTS] ) Closes the current compressed data stream and starts a new one. OPTS consists of any of the the options that are available when creating the C<$z> object. See the L section for more details. =head2 deflateParams Usage is $z->deflateParams TODO =head1 Importing A number of symbolic constants are required by some methods in C. None are imported by default. =over 5 =item :all Imports C, C<$DeflateError> and all symbolic constants that can be used by C. Same as doing this use IO::Compress::Deflate qw(deflate $DeflateError :constants) ; =item :constants Import all symbolic constants. Same as doing this use IO::Compress::Deflate qw(:flush :level :strategy) ; =item :flush These symbolic constants are used by the C method. Z_NO_FLUSH Z_PARTIAL_FLUSH Z_SYNC_FLUSH Z_FULL_FLUSH Z_FINISH Z_BLOCK =item :level These symbolic constants are used by the C option in the constructor. Z_NO_COMPRESSION Z_BEST_SPEED Z_BEST_COMPRESSION Z_DEFAULT_COMPRESSION =item :strategy These symbolic constants are used by the C option in the constructor. Z_FILTERED Z_HUFFMAN_ONLY Z_RLE Z_FIXED Z_DEFAULT_STRATEGY =back =head1 EXAMPLES =head2 Apache::GZip Revisited See L =head2 Working with Net::FTP See L =head1 SEE ALSO L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L L L, L, L, L For RFC 1950, 1951 and 1952 see F, F and F The I compression library was written by Jean-loup Gailly F and Mark Adler F. The primary site for the I compression library is F. The primary site for gzip is F. =head1 AUTHOR This module was written by Paul Marquess, F. =head1 MODIFICATION HISTORY See the Changes file. =head1 COPYRIGHT AND LICENSE Copyright (c) 2005-2011 Paul Marquess. All rights reserved. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L L L, L, L, L For RFC 1950, 1951 and 1952 see F, F and F The I compression library was written by Jean-loup Gailly F and Mark Adler F. The primary site for the I compression library is F. The primary site for gzip is F. =head1 AUTHOR This module was written by Paul Marquess, F. =head1 MODIFICATION HISTORY See the Changes file. =head1 COPYRIGHT AND LICENSE Copyright (c) 2005-2011 Paul Marquess. All rights reserved. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. package IO::Compress::Gzip ; require 5.004 ; use strict ; use warnings; use bytes; use IO::Compress::RawDeflate 2.033 ; use Compress::Raw::Zlib 2.033 ; use IO::Compress::Base::Common 2.033 qw(:Status :Parse createSelfTiedObject); use IO::Compress::Gzip::Constants 2.033 ; use IO::Compress::Zlib::Extra 2.033 ; BEGIN { if (defined &utf8::downgrade ) { *noUTF8 = \&utf8::downgrade } else { *noUTF8 = sub {} } } require Exporter ; our ($VERSION, @ISA, @EXPORT_OK, %EXPORT_TAGS, $GzipError); $VERSION = '2.033'; $GzipError = '' ; @ISA = qw(Exporter IO::Compress::RawDeflate); @EXPORT_OK = qw( $GzipError gzip ) ; %EXPORT_TAGS = %IO::Compress::RawDeflate::DEFLATE_CONSTANTS ; push @{ $EXPORT_TAGS{all} }, @EXPORT_OK ; Exporter::export_ok_tags('all'); sub new { my $class = shift ; my $obj = createSelfTiedObject($class, \$GzipError); $obj->_create(undef, @_); } sub gzip { my $obj = createSelfTiedObject(undef, \$GzipError); return $obj->_def(@_); } #sub newHeader #{ # my $self = shift ; # #return GZIP_MINIMUM_HEADER ; # return $self->mkHeader(*$self->{Got}); #} sub getExtraParams { my $self = shift ; return ( # zlib behaviour $self->getZlibParams(), # Gzip header fields 'Minimal' => [0, 1, Parse_boolean, 0], 'Comment' => [0, 1, Parse_any, undef], 'Name' => [0, 1, Parse_any, undef], 'Time' => [0, 1, Parse_any, undef], 'TextFlag' => [0, 1, Parse_boolean, 0], 'HeaderCRC' => [0, 1, Parse_boolean, 0], 'OS_Code' => [0, 1, Parse_unsigned, $Compress::Raw::Zlib::gzip_os_code], 'ExtraField'=> [0, 1, Parse_any, undef], 'ExtraFlags'=> [0, 1, Parse_any, undef], ); } sub ckParams { my $self = shift ; my $got = shift ; # gzip always needs crc32 $got->value('CRC32' => 1); return 1 if $got->value('Merge') ; my $strict = $got->value('Strict') ; { if (! $got->parsed('Time') ) { # Modification time defaults to now. $got->value('Time' => time) ; } # Check that the Name & Comment don't have embedded NULLs # Also check that they only contain ISO 8859-1 chars. if ($got->parsed('Name') && defined $got->value('Name')) { my $name = $got->value('Name'); return $self->saveErrorString(undef, "Null Character found in Name", Z_DATA_ERROR) if $strict && $name =~ /\x00/ ; return $self->saveErrorString(undef, "Non ISO 8859-1 Character found in Name", Z_DATA_ERROR) if $strict && $name =~ /$GZIP_FNAME_INVALID_CHAR_RE/o ; } if ($got->parsed('Comment') && defined $got->value('Comment')) { my $comment = $got->value('Comment'); return $self->saveErrorString(undef, "Null Character found in Comment", Z_DATA_ERROR) if $strict && $comment =~ /\x00/ ; return $self->saveErrorString(undef, "Non ISO 8859-1 Character found in Comment", Z_DATA_ERROR) if $strict && $comment =~ /$GZIP_FCOMMENT_INVALID_CHAR_RE/o; } if ($got->parsed('OS_Code') ) { my $value = $got->value('OS_Code'); return $self->saveErrorString(undef, "OS_Code must be between 0 and 255, got '$value'") if $value < 0 || $value > 255 ; } # gzip only supports Deflate at present $got->value('Method' => Z_DEFLATED) ; if ( ! $got->parsed('ExtraFlags')) { $got->value('ExtraFlags' => 2) if $got->value('Level') == Z_BEST_COMPRESSION ; $got->value('ExtraFlags' => 4) if $got->value('Level') == Z_BEST_SPEED ; } my $data = $got->value('ExtraField') ; if (defined $data) { my $bad = IO::Compress::Zlib::Extra::parseExtraField($data, $strict, 1) ; return $self->saveErrorString(undef, "Error with ExtraField Parameter: $bad", Z_DATA_ERROR) if $bad ; $got->value('ExtraField', $data) ; } } return 1; } sub mkTrailer { my $self = shift ; return pack("V V", *$self->{Compress}->crc32(), *$self->{UnCompSize}->get32bit()); } sub getInverseClass { return ('IO::Uncompress::Gunzip', \$IO::Uncompress::Gunzip::GunzipError); } sub getFileInfo { my $self = shift ; my $params = shift; my $filename = shift ; my $defaultTime = (stat($filename))[9] ; $params->value('Name' => $filename) if ! $params->parsed('Name') ; $params->value('Time' => $defaultTime) if ! $params->parsed('Time') ; } sub mkHeader { my $self = shift ; my $param = shift ; # stort-circuit if a minimal header is requested. return GZIP_MINIMUM_HEADER if $param->value('Minimal') ; # METHOD my $method = $param->valueOrDefault('Method', GZIP_CM_DEFLATED) ; # FLAGS my $flags = GZIP_FLG_DEFAULT ; $flags |= GZIP_FLG_FTEXT if $param->value('TextFlag') ; $flags |= GZIP_FLG_FHCRC if $param->value('HeaderCRC') ; $flags |= GZIP_FLG_FEXTRA if $param->wantValue('ExtraField') ; $flags |= GZIP_FLG_FNAME if $param->wantValue('Name') ; $flags |= GZIP_FLG_FCOMMENT if $param->wantValue('Comment') ; # MTIME my $time = $param->valueOrDefault('Time', GZIP_MTIME_DEFAULT) ; # EXTRA FLAGS my $extra_flags = $param->valueOrDefault('ExtraFlags', GZIP_XFL_DEFAULT); # OS CODE my $os_code = $param->valueOrDefault('OS_Code', GZIP_OS_DEFAULT) ; my $out = pack("C4 V C C", GZIP_ID1, # ID1 GZIP_ID2, # ID2 $method, # Compression Method $flags, # Flags $time, # Modification Time $extra_flags, # Extra Flags $os_code, # Operating System Code ) ; # EXTRA if ($flags & GZIP_FLG_FEXTRA) { my $extra = $param->value('ExtraField') ; $out .= pack("v", length $extra) . $extra ; } # NAME if ($flags & GZIP_FLG_FNAME) { my $name .= $param->value('Name') ; $name =~ s/\x00.*$//; $out .= $name ; # Terminate the filename with NULL unless it already is $out .= GZIP_NULL_BYTE if !length $name or substr($name, 1, -1) ne GZIP_NULL_BYTE ; } # COMMENT if ($flags & GZIP_FLG_FCOMMENT) { my $comment .= $param->value('Comment') ; $comment =~ s/\x00.*$//; $out .= $comment ; # Terminate the comment with NULL unless it already is $out .= GZIP_NULL_BYTE if ! length $comment or substr($comment, 1, -1) ne GZIP_NULL_BYTE; } # HEADER CRC $out .= pack("v", crc32($out) & 0x00FF ) if $param->value('HeaderCRC') ; noUTF8($out); return $out ; } sub mkFinalTrailer { return ''; } 1; __END__ =head1 NAME IO::Compress::Gzip - Write RFC 1952 files/buffers =head1 SYNOPSIS use IO::Compress::Gzip qw(gzip $GzipError) ; my $status = gzip $input => $output [,OPTS] or die "gzip failed: $GzipError\n"; my $z = new IO::Compress::Gzip $output [,OPTS] or die "gzip failed: $GzipError\n"; $z->print($string); $z->printf($format, $string); $z->write($string); $z->syswrite($string [, $length, $offset]); $z->flush(); $z->tell(); $z->eof(); $z->seek($position, $whence); $z->binmode(); $z->fileno(); $z->opened(); $z->autoflush(); $z->input_line_number(); $z->newStream( [OPTS] ); $z->deflateParams(); $z->close() ; $GzipError ; # IO::File mode print $z $string; printf $z $format, $string; tell $z eof $z seek $z, $position, $whence binmode $z fileno $z close $z ; =head1 DESCRIPTION This module provides a Perl interface that allows writing compressed data to files or buffer as defined in RFC 1952. All the gzip headers defined in RFC 1952 can be created using this module. For reading RFC 1952 files/buffers, see the companion module L. =head1 Functional Interface A top-level function, C, is provided to carry out "one-shot" compression between buffers and/or files. For finer control over the compression process, see the L section. use IO::Compress::Gzip qw(gzip $GzipError) ; gzip $input => $output [,OPTS] or die "gzip failed: $GzipError\n"; The functional interface needs Perl5.005 or better. =head2 gzip $input => $output [, OPTS] C expects at least two parameters, C<$input> and C<$output>. =head3 The C<$input> parameter The parameter, C<$input>, is used to define the source of the uncompressed data. It can take one of the following forms: =over 5 =item A filename If the C<$input> parameter is a simple scalar, it is assumed to be a filename. This file will be opened for reading and the input data will be read from it. =item A filehandle If the C<$input> parameter is a filehandle, the input data will be read from it. The string '-' can be used as an alias for standard input. =item A scalar reference If C<$input> is a scalar reference, the input data will be read from C<$$input>. =item An array reference If C<$input> is an array reference, each element in the array must be a filename. The input data will be read from each file in turn. The complete array will be walked to ensure that it only contains valid filenames before any data is compressed. =item An Input FileGlob string If C<$input> is a string that is delimited by the characters "<" and ">" C will assume that it is an I. The input is the list of files that match the fileglob. See L for more details. =back If the C<$input> parameter is any other type, C will be returned. In addition, if C<$input> is a simple filename, the default values for the C and C