diff --git a/Cargo.lock b/Cargo.lock index 391fa06..f02c246 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,30 +1,30 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. -version = 4 +version = 3 [[package]] name = "aho-corasick" -version = "0.7.18" +version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e37cfd5e7657ada45f742d6e99ca5788580b5c529dc78faf11ece6dc702656f" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" dependencies = [ "memchr", ] [[package]] name = "ansi_term" -version = "0.11.0" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b" +checksum = "d52a9bb7ec0cf484c551830a7ce27bd20d67eac647e1befb56b0be4ee39a55d2" dependencies = [ "winapi", ] [[package]] name = "anyhow" -version = "1.0.40" +version = "1.0.98" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28b2cd92db5cbd74e8e5028f7e27dd7aa3090e89e4f2a197cc7c8dfb69c7063b" +checksum = "e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487" [[package]] name = "atty" @@ -39,21 +39,27 @@ dependencies = [ [[package]] name = "autocfg" -version = "1.0.1" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" + +[[package]] +name = "bitflags" +version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" -version = "1.2.1" +version = "2.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693" +checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967" [[package]] name = "cfg-if" -version = "1.0.0" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +checksum = "9555578bc9e57714c812a1f84e4fc5b4d21fcb063490c624de019f7464c91268" [[package]] name = "ciff" @@ -75,75 +81,101 @@ dependencies = [ [[package]] name = "clap" -version = "2.33.3" +version = "2.34.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37e58ac78573c40708d45522f0d80fa2f01cc4f9b4e2bf749807255454312002" +checksum = "a0610544180c38b88101fecf2dd634b174a62eef6946f84dfc6a7127512b381c" dependencies = [ "ansi_term", "atty", - "bitflags", + "bitflags 1.3.2", "strsim", "textwrap", - "unicode-width", + "unicode-width 0.1.14", "vec_map", ] [[package]] name = "console" -version = "0.14.1" +version = "0.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3993e6445baa160675931ec041a5e03ca84b9c6e32a056150d3aa2bdda0a1f45" +checksum = "2e09ced7ebbccb63b4c65413d821f2e00ce54c5ca4514ddc6b3c892fdbcbc69d" dependencies = [ "encode_unicode", - "lazy_static", "libc", - "regex", - "terminal_size", - "unicode-width", - "winapi", + "once_cell", + "unicode-width 0.2.1", + "windows-sys 0.60.2", ] [[package]] name = "encode_unicode" -version = "0.3.6" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f" +checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0" [[package]] name = "env_logger" -version = "0.8.3" +version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17392a012ea30ef05a610aa97dfb49496e71c9f676b27879922ea5bdf60d9d3f" +checksum = "a19187fea3ac7e84da7dacf48de0c45d63c6a76f9490dae389aead16c243fce3" dependencies = [ "log", "regex", ] +[[package]] +name = "errno" +version = "0.3.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "778e2ac28f6c47af28e4907f13ffd1e1ddbd400980a9abd7c8df189bf578a5ad" +dependencies = [ + "libc", + "windows-sys 0.60.2", +] + +[[package]] +name = "fastrand" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" + +[[package]] +name = "getrandom" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" +dependencies = [ + "cfg-if", + "libc", + "wasi 0.11.1+wasi-snapshot-preview1", +] + [[package]] name = "getrandom" -version = "0.2.2" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c9495705279e7140bf035dde1f6e750c162df8b625267cd52cc44e0b156732c8" +checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4" dependencies = [ "cfg-if", "libc", - "wasi", + "r-efi", + "wasi 0.14.2+wasi-0.2.4", ] [[package]] name = "heck" -version = "0.3.2" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87cbf45460356b7deeb5e3415b5563308c0a9b057c85e12b06ad551f98d0a6ac" +checksum = "6d621efb26863f0e9924c6ac577e8275e5e6b77455db64ffa6c65c904e9e132c" dependencies = [ "unicode-segmentation", ] [[package]] name = "hermit-abi" -version = "0.1.18" +version = "0.1.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "322f4de77956e22ed0e5032c359a0f1273f1f7f0d79bfa3b8ffbc730d7fbcc5c" +checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" dependencies = [ "libc", ] @@ -162,36 +194,39 @@ dependencies = [ [[package]] name = "itoa" -version = "1.0.14" +version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d75a2a4b1b190afb6f5425f10f6a8f959d2ea0b9c2b1d79553551850539e4674" +checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" [[package]] name = "lazy_static" -version = "1.4.0" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" [[package]] name = "libc" -version = "0.2.92" +version = "0.2.174" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56d855069fafbb9b344c0f962150cd2c1187975cb1c22c1522c240d8c4986714" +checksum = "1171693293099992e19cddea4e8b849964e9846f4acee11b3948bcc337be8776" + +[[package]] +name = "linux-raw-sys" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd945864f07fe9f5371a27ad7b52a172b4b499999f1d97574c9fa68373937e12" [[package]] name = "log" -version = "0.4.14" +version = "0.4.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51b9bbe6c47d51fc3e1a9b945965946b4c44142ab8792c50835a980d362c2710" -dependencies = [ - "cfg-if", -] +checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94" [[package]] name = "memchr" -version = "2.5.0" +version = "2.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" +checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0" [[package]] name = "memmap" @@ -205,9 +240,9 @@ dependencies = [ [[package]] name = "num-traits" -version = "0.2.14" +version = "0.2.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a64b1ec5cda2586e284722486d802acf1f7dbdc623e2bfc57e65ca1cd099290" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" dependencies = [ "autocfg", ] @@ -219,10 +254,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "17b02fc0ff9a9e4b35b3342880f48e896ebf69f2967921fe8646bf5b7125956a" [[package]] -name = "ppv-lite86" -version = "0.2.10" +name = "once_cell" +version = "1.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac74c624d6b2d21f425f752262f42188365d7b8ff1aff74c82e45136510a4857" +checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" [[package]] name = "proc-macro-error" @@ -233,7 +268,7 @@ dependencies = [ "proc-macro-error-attr", "proc-macro2", "quote", - "syn 1.0.68", + "syn 1.0.109", "version_check", ] @@ -250,9 +285,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.93" +version = "1.0.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60946a68e5f9d28b0dc1c21bb8a97ee7d018a8b322fa57838ba31cc878e22d99" +checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778" dependencies = [ "unicode-ident", ] @@ -295,78 +330,65 @@ dependencies = [ [[package]] name = "quickcheck_macros" -version = "1.0.0" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b22a693222d716a9587786f37ac3f6b4faedb5b80c23914e7303ff5a1d8016e9" +checksum = "f71ee38b42f8459a88d3362be6f9b841ad2d5421844f61eb1c59c11bff3ac14a" dependencies = [ "proc-macro2", "quote", - "syn 1.0.68", + "syn 2.0.104", ] [[package]] name = "quote" -version = "1.0.38" +version = "1.0.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e4dccaaaf89514f546c693ddc140f729f958c247918a13380cccc6078391acc" +checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" dependencies = [ "proc-macro2", ] [[package]] -name = "rand" -version = "0.8.3" +name = "r-efi" +version = "5.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ef9e7e66b4468674bfcb0c81af8b7fa0bb154fa9f28eb840da5c447baeb8d7e" -dependencies = [ - "libc", - "rand_chacha", - "rand_core", - "rand_hc", -] +checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" [[package]] -name = "rand_chacha" -version = "0.3.0" +name = "rand" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e12735cf05c9e10bf21534da50a147b924d555dc7a547c42e6bb2d5b6017ae0d" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" dependencies = [ - "ppv-lite86", "rand_core", ] [[package]] name = "rand_core" -version = "0.6.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34cf66eb183df1c5876e2dcf6b13d57340741e8dc255b48e40a26de954d06ae7" -dependencies = [ - "getrandom", -] - -[[package]] -name = "rand_hc" -version = "0.3.0" +version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3190ef7066a446f2e7f42e239d161e905420ccab01eb967c9eb27d21b2322a73" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" dependencies = [ - "rand_core", + "getrandom 0.2.16", ] [[package]] -name = "redox_syscall" -version = "0.2.5" +name = "regex" +version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94341e4e44e24f6b591b59e47a8a027df12e008d73fd5672dbea9cc22f4507d9" +checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" dependencies = [ - "bitflags", + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", ] [[package]] -name = "regex" -version = "1.6.0" +name = "regex-automata" +version = "0.4.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c4eb3267174b8c6c2f654116623910a0fef09c4753f8dd83db29c48a0df988b" +checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" dependencies = [ "aho-corasick", "memchr", @@ -375,50 +397,54 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.6.27" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3f87b73ce11b1619a3c6332f45341e0047173771e8b8b73f87bfeefb7b56244" +checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" [[package]] -name = "remove_dir_all" -version = "0.5.3" +name = "rustix" +version = "1.0.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3acd125665422973a33ac9d3dd2df85edad0f4ae9b00dafb1a05e43a9f5ef8e7" +checksum = "11181fbabf243db407ef8df94a6ce0b2f9a733bd8be4ad02b4eda9602296cac8" dependencies = [ - "winapi", + "bitflags 2.9.1", + "errno", + "libc", + "linux-raw-sys", + "windows-sys 0.60.2", ] [[package]] name = "ryu" -version = "1.0.19" +version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ea1a2d0a644769cc99faa24c3ad26b379b786fe7c36fd3c546254801650e6dd" +checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" [[package]] name = "serde" -version = "1.0.218" +version = "1.0.219" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8dfc9d19bdbf6d17e22319da49161d5d0108e4188e8b680aef6299eed22df60" +checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.218" +version = "1.0.219" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f09503e191f4e797cb8aac08e9a4a4695c5edf6a2e70e376d961ddd5c969f82b" +checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.104", ] [[package]] name = "serde_json" -version = "1.0.139" +version = "1.0.142" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44f86c3acccc9c65b153fe1b85a3be07fe5515274ec9f0653b4a0875731c72a6" +checksum = "030fedb782600dcbd6f02d479bf0d817ac3bb40d644745b769d6a96bc3afc5a7" dependencies = [ "itoa", "memchr", @@ -434,9 +460,9 @@ checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a" [[package]] name = "structopt" -version = "0.3.21" +version = "0.3.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5277acd7ee46e63e5168a80734c9f6ee81b1367a7d8772a2d765df2a3705d28c" +checksum = "0c6b5c64445ba8094a6ab0c3cd2ad323e07171012d9c98b0b15651daf1787a10" dependencies = [ "clap", "lazy_static", @@ -445,33 +471,33 @@ dependencies = [ [[package]] name = "structopt-derive" -version = "0.4.14" +version = "0.4.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ba9cdfda491b814720b6b06e0cac513d922fc407582032e8706e9f137976f90" +checksum = "dcb5ae327f9cc13b68763b5749770cb9e048a99bd9dfdfa58d0cf05d5f64afe0" dependencies = [ "heck", "proc-macro-error", "proc-macro2", "quote", - "syn 1.0.68", + "syn 1.0.109", ] [[package]] name = "syn" -version = "1.0.68" +version = "1.0.109" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ce15dd3ed8aa2f8eeac4716d6ef5ab58b6b9256db41d7e1a0224c2788e8fd87" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" dependencies = [ "proc-macro2", "quote", - "unicode-xid", + "unicode-ident", ] [[package]] name = "syn" -version = "2.0.98" +version = "2.0.104" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "36147f1a48ae0ec2b5b3bc5b537d267457555a10dc06f3dbc8cb11ba3006d3b1" +checksum = "17b6f705963418cdb9927482fa304bc562ece2fdd4f616084c50b7023b435a40" dependencies = [ "proc-macro2", "quote", @@ -480,26 +506,15 @@ dependencies = [ [[package]] name = "tempfile" -version = "3.2.0" +version = "3.20.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dac1c663cfc93810f88aed9b8941d48cabf856a1b111c29a40439018d870eb22" +checksum = "e8a64e3985349f2441a1a9ef0b853f869006c3855f2cda6862a94d26ebb9d6a1" dependencies = [ - "cfg-if", - "libc", - "rand", - "redox_syscall", - "remove_dir_all", - "winapi", -] - -[[package]] -name = "terminal_size" -version = "0.1.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86ca8ced750734db02076f44132d802af0b33b09942331f4459dde8636fd2406" -dependencies = [ - "libc", - "winapi", + "fastrand", + "getrandom 0.3.3", + "once_cell", + "rustix", + "windows-sys 0.59.0", ] [[package]] @@ -508,32 +523,32 @@ version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" dependencies = [ - "unicode-width", + "unicode-width 0.1.14", ] [[package]] name = "unicode-ident" -version = "1.0.17" +version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00e2473a93778eb0bad35909dff6a10d28e63f792f16ed15e404fca9d5eeedbe" +checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" [[package]] name = "unicode-segmentation" -version = "1.7.1" +version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bb0d2e7be6ae3a5fa87eed5fb451aff96f2573d2694942e40543ae0bbe19c796" +checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" [[package]] name = "unicode-width" -version = "0.1.8" +version = "0.1.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9337591893a19b88d8d87f2cec1e73fad5cdfd10e5a6f349f498ad6ea2ffb1e3" +checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af" [[package]] -name = "unicode-xid" +name = "unicode-width" version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7fe0bb3479651439c9112f72b6c505038574c9fbb575ed1bf3b797fa39dd564" +checksum = "4a1a07cc7db3810833284e8d372ccdc6da29741639ecc70c9ec107df0fa6154c" [[package]] name = "vec_map" @@ -543,15 +558,24 @@ checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191" [[package]] name = "version_check" -version = "0.9.3" +version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5fecdca9a5291cc2b8dcf7dc02453fee791a280f3743cb0905f8822ae463b3fe" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" [[package]] name = "wasi" -version = "0.10.2+wasi-snapshot-preview1" +version = "0.11.1+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6" +checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" + +[[package]] +name = "wasi" +version = "0.14.2+wasi-0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9683f9a5a998d873c0d21fcbe3c083009670149a8fab228644b8bd36b2c48cb3" +dependencies = [ + "wit-bindgen-rt", +] [[package]] name = "winapi" @@ -574,3 +598,165 @@ name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows-link" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e6ad25900d524eaabdbbb96d20b4311e1e7ae1699af4fb28c17ae66c80d798a" + +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-sys" +version = "0.60.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" +dependencies = [ + "windows-targets 0.53.3", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm 0.52.6", + "windows_aarch64_msvc 0.52.6", + "windows_i686_gnu 0.52.6", + "windows_i686_gnullvm 0.52.6", + "windows_i686_msvc 0.52.6", + "windows_x86_64_gnu 0.52.6", + "windows_x86_64_gnullvm 0.52.6", + "windows_x86_64_msvc 0.52.6", +] + +[[package]] +name = "windows-targets" +version = "0.53.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d5fe6031c4041849d7c496a8ded650796e7b6ecc19df1a431c1a363342e5dc91" +dependencies = [ + "windows-link", + "windows_aarch64_gnullvm 0.53.0", + "windows_aarch64_msvc 0.53.0", + "windows_i686_gnu 0.53.0", + "windows_i686_gnullvm 0.53.0", + "windows_i686_msvc 0.53.0", + "windows_x86_64_gnu 0.53.0", + "windows_x86_64_gnullvm 0.53.0", + "windows_x86_64_msvc 0.53.0", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86b8d5f90ddd19cb4a147a5fa63ca848db3df085e25fee3cc10b39b6eebae764" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7651a1f62a11b8cbd5e0d42526e55f2c99886c77e007179efff86c2b137e66c" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnu" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1dc67659d35f387f5f6c479dc4e28f1d4bb90ddd1a5d3da2e5d97b42d6272c3" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ce6ccbdedbf6d6354471319e781c0dfef054c81fbc7cf83f338a4296c0cae11" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_i686_msvc" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "581fee95406bb13382d2f65cd4a908ca7b1e4c2f1917f143ba16efe98a589b5d" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e55b5ac9ea33f2fc1716d1742db15574fd6fc8dadc51caab1c16a3d3b4190ba" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0a6e035dd0599267ce1ee132e51c27dd29437f63325753051e71dd9e42406c57" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486" + +[[package]] +name = "wit-bindgen-rt" +version = "0.39.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1" +dependencies = [ + "bitflags 2.9.1", +] diff --git a/Cargo.toml b/Cargo.toml index 2b01f19..5cc670f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -23,6 +23,11 @@ path = "src/pisa2ciff.rs" name = "jsonl2ciff" path = "src/jsonl2ciff.rs" +[[bin]] +name = "ciff2jsonl" +path = "src/ciff2jsonl.rs" + + [dependencies] protobuf = "2.28.0" structopt = "0.3" diff --git a/src/ciff2jsonl.rs b/src/ciff2jsonl.rs new file mode 100644 index 0000000..1c955d5 --- /dev/null +++ b/src/ciff2jsonl.rs @@ -0,0 +1,30 @@ +mod proto; +pub use proto::{DocRecord, Header, Posting, PostingsList}; + +use ciff::CiffToJsonl; +use std::path::PathBuf; +use structopt::StructOpt; + +#[derive(Debug, StructOpt)] +#[structopt( + name = "ciff2jsonl", + about = "Convert a Common Index Format [v1] file into a jsonl file" +)] +struct Args { + #[structopt(short, long, help = "Path to CIFF file")] + input: PathBuf, + #[structopt(short, long, help = "Output jsonl file")] + output: PathBuf, +} + +fn main() { + let args = Args::from_args(); + + let mut converter = CiffToJsonl::default(); + converter.input_path(args.input).output_path(args.output); + + if let Err(error) = converter.convert() { + eprintln!("ERROR: {error}"); + std::process::exit(1); + } +} diff --git a/src/jsonl2ciff.rs b/src/jsonl2ciff.rs index be24a26..ec0b3b0 100644 --- a/src/jsonl2ciff.rs +++ b/src/jsonl2ciff.rs @@ -7,8 +7,8 @@ use structopt::StructOpt; #[derive(Debug, StructOpt)] #[structopt( - name = "pisa2ciff", - about = "Convert a PISA index to a Common Index Format [v1]" + name = "jsonl2ciff", + about = "Convert a jsonl file into the Common Index Format [v1]" )] struct Args { #[structopt(short, long, help = "Path to jsonl file")] diff --git a/src/lib.rs b/src/lib.rs index 4705ada..f1037cc 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -57,7 +57,7 @@ use indicatif::{ProgressBar, ProgressStyle}; use memmap::Mmap; use num_traits::ToPrimitive; use protobuf::{CodedInputStream, CodedOutputStream}; -use serde::Deserialize; +use serde::{Deserialize, Serialize}; use std::borrow::Borrow; use std::collections::HashMap; use std::convert::TryFrom; @@ -880,7 +880,7 @@ where } } -/// PISA to CIFF converter. +/// JSONL to CIFF converter. #[derive(Debug, Default, Clone)] pub struct JsonlToCiff { input: Option, @@ -1145,6 +1145,129 @@ impl JsonlToCiff { } } +/// CIFF to JSONL converter. +#[derive(Debug, Default, Clone)] +pub struct CiffToJsonl { + input: Option, + output: Option, +} + +// Note that the keys "id" and "vector" are currently commonly used when sharing +// json files for learned sparse retrieval systems. As such, we follow this +// same naming convention +#[derive(Serialize)] +struct JsonRecord { + id: String, + vector: HashMap, +} + +impl CiffToJsonl { + /// Set the path of the CIFF file. Required. + pub fn input_path>(&mut self, path: P) -> &mut Self { + self.input = Some(path.into()); + self + } + + /// Set the output JSONL file path. Required. + pub fn output_path>(&mut self, path: P) -> &mut Self { + self.output = Some(path.into()); + self + } + + /// Performs the conversion from CIFF to JSONL. + /// + /// # Errors + /// + /// If the input / output is invalid or any I/O / parsing error occurs, + /// returns an error. + pub fn convert(&self) -> Result<()> { + let input_path = self + .input + .as_ref() + .ok_or_else(|| anyhow!("No CIFF input path was set"))?; + let output_path = self + .output + .as_ref() + .ok_or_else(|| anyhow!("No JSON output path was set"))?; + + // Open the CIFF file for processing + let mut input_file = + File::open(input_path).with_context(|| format!("Cannot open {input_path:?}"))?; + let mut reader = CodedInputStream::new(&mut input_file); + + let header = Header::from_stream(&mut reader)?; + println!("{header}"); + + // We'll map "docid" -> (term, tf). + // Because CIFF uses integer tf, we convert scores to i32. + let mut fwd_idx: HashMap> = HashMap::new(); + + // Map from an internal integer docid to a string "collection" docid. + let mut docid_map: HashMap = HashMap::new(); + + // 1. Map the postings back into a forward index + let progress = ProgressBar::new(u64::from(header.num_postings_lists)); + progress.set_style(pb_style()); + progress.set_draw_delta(10); + for _ in 0..header.num_postings_lists { + let postings_list = reader.read_message::()?; + let term = postings_list.get_term(); + let postings = postings_list.get_postings(); + + let mut docid = 0_u32; + for p in postings.iter() { + let delta = u32::try_from(p.get_docid()).expect("Negative ID"); + docid += delta; + let impact = u32::try_from(p.get_tf()).expect("Negative TF"); + fwd_idx + .entry(docid) + .or_default() + .push((term.to_string(), impact)); + } + + progress.inc(1); + } + progress.finish(); + + // 2. Read textual docids + let progress = ProgressBar::new(u64::from(header.num_documents)); + progress.set_style(pb_style()); + progress.set_draw_delta(10); + // build the mapping from internal to external identifier + for _ in 0..header.num_documents { + let doc_record = reader.read_message::()?; + + let docid: u32 = doc_record + .get_docid() + .to_u32() + .ok_or_else(|| anyhow!("Cannot cast docid to u32: {}", doc_record.get_docid()))?; + + let trecid = doc_record.get_collection_docid(); + docid_map.insert(docid, trecid.to_string()); + progress.inc(1); + } + progress.finish(); + + // 3. Prepare and write json objects for each document + let outfile = File::create(output_path)?; + let mut writer = BufWriter::new(outfile); + let progress = ProgressBar::new(u64::from(header.num_documents)); + progress.set_style(pb_style()); + progress.set_draw_delta(10); + for (int_id, docvec) in fwd_idx { + let vector: HashMap = docvec.into_iter().collect(); + let id = docid_map.get(&int_id).expect("Docid not found").to_string(); + let record = JsonRecord { id, vector }; + let json_line = serde_json::to_string(&record).unwrap(); + writeln!(writer, "{json_line}")?; + progress.inc(1); + } + progress.finish(); + + Ok(()) + } +} + #[cfg(test)] mod test { use super::*;