From d491497adac7aa853191c04828eff323d0fd7023 Mon Sep 17 00:00:00 2001 From: root Date: Mon, 5 Sep 2016 14:37:54 +0300 Subject: [PATCH] First commit --- COPYING | 674 ++++++++++++++++++++++++++ Makefile.am | 3 + README.md | 50 ++ autogen.sh | 6 + configure.ac | 245 ++++++++++ etc/extfilter.ini | 60 +++ etc/systemd/extfilter.service | 12 + etc/systemd/install_service.sh | 6 + include/AhoCorasickPlus.h | 81 ++++ include/Makefile.am | 2 + include/actypes.h | 143 ++++++ include/ahocorasick.h | 90 ++++ include/config.h | 74 +++ include/config.h.in | 73 +++ include/config.h.in~ | 73 +++ include/main.h | 87 ++++ include/ndpiwrapper.h | 62 +++ include/node.h | 74 +++ include/patr.h | 43 ++ include/patricia.h | 155 ++++++ include/qdpi.h | 26 + include/sender.h | 67 +++ include/sendertask.h | 116 +++++ include/stamp-h1 | 1 + include/statistictask.h | 38 ++ include/stats.h | 24 + include/worker.h | 95 ++++ src/AhoCorasickPlus.cpp | 111 +++++ src/Makefile.am | 9 + src/ahocorasick.cpp | 461 ++++++++++++++++++ src/main.cpp | 624 ++++++++++++++++++++++++ src/node.cpp | 254 ++++++++++ src/patr.cpp | 128 +++++ src/patricia.c | 833 +++++++++++++++++++++++++++++++++ src/qdpi.cpp | 96 ++++ src/sender.cpp | 264 +++++++++++ src/sendertask.cpp | 63 +++ src/statistictask.cpp | 160 +++++++ src/worker.cpp | 401 ++++++++++++++++ 39 files changed, 5784 insertions(+) create mode 100644 COPYING create mode 100644 Makefile.am create mode 100644 README.md create mode 100755 autogen.sh create mode 100644 configure.ac create mode 100644 etc/extfilter.ini create mode 100644 etc/systemd/extfilter.service create mode 100755 etc/systemd/install_service.sh create mode 100644 include/AhoCorasickPlus.h create mode 100644 include/Makefile.am create mode 100644 include/actypes.h create mode 100644 include/ahocorasick.h create mode 100644 include/config.h create mode 100644 include/config.h.in create mode 100644 include/config.h.in~ create mode 100644 include/main.h create mode 100644 include/ndpiwrapper.h create mode 100644 include/node.h create mode 100644 include/patr.h create mode 100644 include/patricia.h create mode 100644 include/qdpi.h create mode 100644 include/sender.h create mode 100644 include/sendertask.h create mode 100644 include/stamp-h1 create mode 100644 include/statistictask.h create mode 100644 include/stats.h create mode 100644 include/worker.h create mode 100644 src/AhoCorasickPlus.cpp create mode 100644 src/Makefile.am create mode 100644 src/ahocorasick.cpp create mode 100644 src/main.cpp create mode 100644 src/node.cpp create mode 100644 src/patr.cpp create mode 100644 src/patricia.c create mode 100644 src/qdpi.cpp create mode 100644 src/sender.cpp create mode 100644 src/sendertask.cpp create mode 100644 src/statistictask.cpp create mode 100644 src/worker.cpp diff --git a/COPYING b/COPYING new file mode 100644 index 0000000..94a9ed0 --- /dev/null +++ b/COPYING @@ -0,0 +1,674 @@ + GNU GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU General Public License is a free, copyleft license for +software and other kinds of works. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +the GNU General Public License is intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. We, the Free Software Foundation, use the +GNU General Public License for most of our software; it applies also to +any other work released this way by its authors. You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + To protect your rights, we need to prevent others from denying you +these rights or asking you to surrender the rights. Therefore, you have +certain responsibilities if you distribute copies of the software, or if +you modify it: responsibilities to respect the freedom of others. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must pass on to the recipients the same +freedoms that you received. You must make sure that they, too, receive +or can get the source code. And you must show them these terms so they +know their rights. + + Developers that use the GNU GPL protect your rights with two steps: +(1) assert copyright on the software, and (2) offer you this License +giving you legal permission to copy, distribute and/or modify it. + + For the developers' and authors' protection, the GPL clearly explains +that there is no warranty for this free software. For both users' and +authors' sake, the GPL requires that modified versions be marked as +changed, so that their problems will not be attributed erroneously to +authors of previous versions. + + Some devices are designed to deny users access to install or run +modified versions of the software inside them, although the manufacturer +can do so. This is fundamentally incompatible with the aim of +protecting users' freedom to change the software. The systematic +pattern of such abuse occurs in the area of products for individuals to +use, which is precisely where it is most unacceptable. Therefore, we +have designed this version of the GPL to prohibit the practice for those +products. If such problems arise substantially in other domains, we +stand ready to extend this provision to those domains in future versions +of the GPL, as needed to protect the freedom of users. + + Finally, every program is threatened constantly by software patents. +States should not allow patents to restrict development and use of +software on general-purpose computers, but in those that do, we wish to +avoid the special danger that patents applied to a free program could +make it effectively proprietary. To prevent this, the GPL assures that +patents cannot be used to render the program non-free. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Use with the GNU Affero General Public License. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU Affero General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the special requirements of the GNU Affero General Public License, +section 13, concerning interaction through a network will apply to the +combination as such. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If the program does terminal interaction, make it output a short +notice like this when it starts in an interactive mode: + + Copyright (C) + This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, your program's commands +might be different; for a GUI interface, you would use an "about box". + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU GPL, see +. + + The GNU General Public License does not permit incorporating your program +into proprietary programs. If your program is a subroutine library, you +may consider it more useful to permit linking proprietary applications with +the library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. But first, please read +. diff --git a/Makefile.am b/Makefile.am new file mode 100644 index 0000000..46e4450 --- /dev/null +++ b/Makefile.am @@ -0,0 +1,3 @@ +AUTOMAKE_OPTIONS = foreign +SUBDIRS = src include +EXTRA_DIST = COPYING etc diff --git a/README.md b/README.md new file mode 100644 index 0000000..dcf0b3f --- /dev/null +++ b/README.md @@ -0,0 +1,50 @@ +extFilter +=========== +Программа для блокирования сайтов из списка РКН с использованием DPDK и nDPI. + +Функционал +---------- +Программа осуществляет блокировку сайтов путем анализа зеркалированного трафика от пользователей. +В случае нахождения вызываемого абонентом HTTP ресурса в списке блокировки, пользователю отсылается редирект на специальную страницу или сбрасывается соединение. +Блокировка HTTPS ресурсов осуществляется на основе имени сервера в client hello запросе от пользователя или ip адреса сервера, если имени сервера нет в client hello запросе. +В случае нахождения вызываемого абонентом HTTPS ресурса в списке блокировки, соединение с данными ресурсом будет сброшено. + +Требования +---------- +Для сборки программы необходимы следующие библиотеки и программы: + + Poco >= 1.6 + nDPI = git dev (устанавливается автоматически) + PcapPlusPlus = git master (устанавливается автоматически) + DPDK = 16.07 + git + +Сборка +------ +- [Установить DPDK](http://dpdk.org/doc/quick-start) +- Сгенерировать configure +```bash +./autogen.sh +``` +- Запустить configure +```bash +./configure --with-dpdk_target= --with-dpdk_home= +```bash +- Скомпилировать программу +```bash +make +``` + +Настройка DPDK +-------------- +Для работы DPDK необходимо настроить huge-pages и подключить необходимые сетевые адаптеры в DPDK. + +Запуск +------ +Параметры работы программы задаются в конфигурационном файле. +Для запуска программы необходимо указать путь к конфигурационному .ini файлу (--config-file в командой строке). Для запуска в режиме daemon необходимо указать ключи --daemon и --pidfile=/path/to/file.pid + +Файлы списков блокировки +------------------------ +Файлы с данными для блокировки (домены, url и т.д.) должны быть в формате [nfqfilter](https://github.com/max197616/nfqfilter). + diff --git a/autogen.sh b/autogen.sh new file mode 100755 index 0000000..2a58d0b --- /dev/null +++ b/autogen.sh @@ -0,0 +1,6 @@ +#!/bin/sh + +rm -f configure + +autoreconf -ivf + diff --git a/configure.ac b/configure.ac new file mode 100644 index 0000000..eb3a199 --- /dev/null +++ b/configure.ac @@ -0,0 +1,245 @@ +# -*- Autoconf -*- +# Process this file with autoconf to produce a configure script. + +AC_PREREQ([2.69]) +AC_INIT(extFilter, 0.1, max1976@mail.ru) + +PPP_HOME=./PcapPlusPlus +DPDK_HOME= +DPDK_TARGET= + +NDPI_HOME=./nDPI +NDPI_GIT_VERSION=dev + +AM_INIT_AUTOMAKE() + +m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES(no)]) + +AC_CONFIG_SRCDIR([src/main.cpp]) +AC_CONFIG_HEADERS([include/config.h]) + +AC_LANG([C++]) +AC_LANG_PUSH([C++]) + +# store current user given compiler flags to avoid default setup via AC_PROG_CXX +OLD_CXXFLAGS=$CXXFLAGS +OLD_CFLAGS=$CFLAGS + +# Checks for programs. +AC_PROG_CXX +AC_PROG_CC + +CXXFLAGS=$OLD_CXXFLAGS +CFLAGS=$OLD_CFLAGS + +CFLAGS="$CFLAGS --pedantic -Wall -O2" + +AC_ARG_ENABLE(debug, + AS_HELP_STRING( + [--enable-debug], + [enable debugging, default: no]), + [case "${enableval}" in + yes) debug=true ;; + no) debug=false ;; + *) AC_MSG_ERROR([bad value ${enableval} for --enable-debug]) ;; + esac], + [debug=false]) + +AC_MSG_CHECKING([for debug enabled]) + + +if test x"$debug" = x"true"; then + CXXFLAGS="$CXXFLAGS -std=c++0x -O0 -g -Wall -pthread" +else + CXXFLAGS="$CXXFLAGS -std=c++0x -O2 -pthread" +fi + +AC_COMPILE_IFELSE([AC_LANG_SOURCE( + [[template + struct check + { + static_assert(sizeof(int) <= sizeof(T), "not big enough"); + }; + + typedef check> right_angle_brackets; + + int a; + decltype(a) b; + + typedef check check_type; + check_type c; + check_type&& cr = static_cast(c);]])],, + AC_MSG_FAILURE(['$CXX $CXXFLAGS' does not accept ISO C++11])) + + + + +# Checks for libraries. + +# Checks for header files. +AC_CHECK_HEADERS([netinet/in.h stdint.h]) + +# Checks for typedefs, structures, and compiler characteristics. +AC_CHECK_HEADER_STDBOOL + +# Checks for library functions. +AC_FUNC_ERROR_AT_LINE +AC_CHECK_FUNCS([strerror]) + +# Check for methods in library and check for header files +AC_CHECK_HEADERS([Poco/Foundation.h Poco/Net/HTTPCookie.h Poco/Util/Timer.h], + [], + AC_MSG_ERROR([Poco include files not found.]) +) + +AC_CHECK_LIB([PocoFoundation],[main],[HAVE_POCOFOUNDATION=1],AC_MSG_ERROR([PocoFoundation library not found.])) +if test "$HAVE_POCOFOUNDATION"; then + save_libs="${LIBS}" + LIBS="-lPocoFoundation" + AC_LINK_IFELSE( + [AC_LANG_PROGRAM( + [#include "Poco/UnicodeConverter.h"], + [std::wstring wstr; Poco::UnicodeConverter::toUTF16("hello", wstr);] + )], + [LIBS="$LIBS $save_libs"], + [AC_MSG_ERROR([linking with PocoFoundation failed.])] + ) +fi + +AC_CHECK_LIB([PocoUtil],[main],[HAVE_POCOUTIL=1],AC_MSG_ERROR([PocoUtil library not found.])) + if test "$HAVE_POCOUTIL"; then + save_libs="${LIBS}" + LIBS="-lPocoUtil" + AC_LINK_IFELSE( + [AC_LANG_PROGRAM( + [#include "Poco/Util/Option.h"], + [Poco::Util::Option();] + )], + [LIBS="$LIBS $save_libs"], + [AC_MSG_ERROR([linking with PocoUtil failed.])] + ) +fi + +AC_CHECK_LIB([PocoNet],[main],[HAVE_POCONET=1],AC_MSG_ERROR([PocoNet library not found.])) +if test "$HAVE_POCONET"; then + save_libs="${LIBS}" + LIBS="-lPocoNet" + AC_LINK_IFELSE( + [AC_LANG_PROGRAM( + [#include "Poco/Net/HTTPClientSession.h"], + [Poco::Net::HTTPClientSession();] + )], + [LIBS="$LIBS $save_libs"], + [AC_MSG_ERROR([linking with PocoNET failed.])] + ) +fi + +AC_ARG_WITH( + [ppp_home], + AS_HELP_STRING( + [--with-ppp_home=DIR], + [Path to PcapPlusPlus library] + ), + [PPP_HOME="$withval"] +) + +dnl PcapPlusPlus checks... +AC_MSG_CHECKING(for PcapPlusPlus $PPP_HOME) +if test -d "$PPP_HOME" ; then : + AC_MSG_RESULT(found in $PPP_HOME) +else + AC_MSG_RESULT(not found) + AC_MSG_NOTICE(Getting PPP from git) + git clone -b master https://github.com/max197616/PcapPlusPlus.git $PPP_HOME; cd $PPP_HOME; cd - +# git clone -b master https://github.com/seladb/PcapPlusPlus.git $PPP_HOME; cd $PPP_HOME; cd - + AC_MSG_CHECKING(for PPP $PPP_HOME) + if test -d "$PPP_HOME" ; then : + AC_MSG_RESULT(found in $PPP_HOME) + else + AC_MSG_ERROR(Install PcapPlusPlus manually!) + fi +fi + +PPP_LIB=$PPP_HOME/Dist/libCommon++.a +AC_MSG_CHECKING(for $PPP_LIB) +if test -f "$PPP_LIB" ; then : + AC_MSG_RESULT(yes) +else + AC_MSG_RESULT([not found, compiling...]) + cd $PPP_HOME; ./configure-linux.sh --dpdk --dpdk-home $DPDK_HOME --dpdk-target $DPDK_TARGET; make; cd - +fi + +AC_ARG_WITH( + [dpdk_home], + AS_HELP_STRING( + [--with-dpdk_home=DIR], + [Path to DPDK library] + ), + [DPDK_HOME="$withval"] +) + +AC_ARG_WITH( + [dpdk_target], + AS_HELP_STRING( + [--with-dpdk_target=target], + [DPDK target] + ), + [DPDK_TARGET="$withval"] +) + +if test -z "$DPDK_TARGET"; then + AC_MSG_ERROR([dpdk_target must be set]) +fi + +if test -z "$DPDK_HOME"; then + AC_MSG_ERROR([dpdk_home must be set]) +fi + +AC_SUBST(DPDK_LIB,$DPDK_HOME/$DPDK_TARGET/lib) +AC_SUBST(PPP_LIB,$PPP_HOME/Dist) +AC_SUBST(PPP_HEADER,$PPP_LIB/header) + +dnl nDPI checks... +AC_MSG_CHECKING(for nDPI $NDPI_HOME) +if test -d "$NDPI_HOME" ; then : + AC_MSG_RESULT(found in $NDPI_HOME) +else + AC_MSG_RESULT(not found) + AC_MSG_NOTICE(Getting nDPI from git) + git clone -b $NDPI_GIT_VERSION https://github.com/ntop/nDPI.git $NDPI_HOME; cd $NDPI_HOME; cd - + AC_MSG_CHECKING(for nDPI $NDPI_HOME) + if test -d "$NDPI_HOME" ; then : + AC_MSG_RESULT(found in $NDPI_HOME) + else + AC_MSG_ERROR(Install nDPI 1.7-stable: git clone -b $NDPI_GIT_VERSION https://github.com/ntop/nDPI.git $NDPI_HOME; cd $NDPI_HOME; patch -p1 < ../nDPI-1.7-bugfixes.patch ; ./autogen.sh; make; cd - ) + fi +fi + +NDPI_LIB=$NDPI_HOME/src/lib/.libs/libndpi.a +AC_MSG_CHECKING(for $NDPI_LIB) +if test -f "$NDPI_LIB" ; then : + +else + AC_MSG_RESULT([not found, compiling...]) + cd $NDPI_HOME; ./autogen.sh; make; cd - +fi +AC_MSG_RESULT(yes) + +save_flags="${CXXFLAGS}" +CXXFLAGS="${CXXFLAGS} -I${NDPI_HOME}/src/include" +AC_MSG_CHECKING(for compiling with nDPI) +save_libs="${LIBS}" +LIBS="$NDPI_LIB" +AC_LINK_IFELSE( + [AC_LANG_PROGRAM( + [#include "ndpi_api.h"], + [ndpi_revision();] + )], + [LIBS="$save_libs"], + [AC_MSG_ERROR([Linking with nDPI failed!])] +) +AC_MSG_RESULT(yes) +CXXFLAGS="$save_flags" + + +AC_OUTPUT(Makefile src/Makefile include/Makefile) diff --git a/etc/extfilter.ini b/etc/extfilter.ini new file mode 100644 index 0000000..2374ebb --- /dev/null +++ b/etc/extfilter.ini @@ -0,0 +1,60 @@ +; Переводить имя хоста в прописные буквы. +lower_host = true + +domainlist = /usr/local/etc/extfilter/domains +urllist = /usr/local/etc/extfilter/urls +ssllist = /usr/local/etc/extfilter/ssl_host + +; Файл с портами для nDPI. +protocols = /usr/local/etc/extfilter/protos + +; Список ip адресов/сетей для блокировки ssl если нет server_name в ssl hello пакете. Загружается если block_undetected_ssl = true. +sslips = /usr/local/etc/extfilter/ssl_ips + +; если false, то будет послан rst пакет вместо редиректа. Default: false +http_redirect = true + +redirect_url = http://notify.tushino.com/?reason=5& + +; HTTP код ответа. default: 302 Moved Temporarily +http_code = 302 Found + +; Что добавлять в redirect_url, line - строка из файла url, url - запрещенный url, none - ничего +url_additional_info=line + + +; посылать tcp rst в сторону сервера от имени клиента. Default: false +rst_to_server = false + +; Default: 0 - disable +statistic_interval = 300 + +; Default: false +match_url_exactly = false + +; Default: false +block_undetected_ssl = false + +; dpdk порт(ы), где анализировать трафик +dpdk_ports = 0 + +; количество входящих очередей +rx_queues = 1 + +; размер пула mbuf. Default: 4095 +;mbuf_pool_size = 4095 + +; Какие ядра использовать. Default: все ядра, кроме management. +; core_mask = 7 + +[logging] +loggers.root.level = information +;loggers.root.level = debug +loggers.root.channel = fileChannel +channels.fileChannel.class = FileChannel +channels.fileChannel.path = /var/log/extFilter.log +channels.fileChannel.rotation = 1 M +channels.fileChannel.archive = timestamp +channels.fileChannel.formatter.class = PatternFormatter +channels.fileChannel.formatter.pattern = %Y-%m-%d %H:%M:%S.%i [%P] %p %s - %t +channels.fileChannel.formatter.times = local diff --git a/etc/systemd/extfilter.service b/etc/systemd/extfilter.service new file mode 100644 index 0000000..8c98623 --- /dev/null +++ b/etc/systemd/extfilter.service @@ -0,0 +1,12 @@ +[Unit] +Description=extFilter is a daemon for filtering traffic using DPDK +Requires=network.target +After=network.target + +[Service] +Type=forking +ExecStart=/usr/local/bin/extFilter --daemon --pidfile=/var/run/extFilter.pid --config-file /usr/local/etc/extfilter.ini +PIDFile=/var/run/extFilter.pid + +[Install] +WantedBy=multi-user.target diff --git a/etc/systemd/install_service.sh b/etc/systemd/install_service.sh new file mode 100755 index 0000000..b228501 --- /dev/null +++ b/etc/systemd/install_service.sh @@ -0,0 +1,6 @@ +#!/bin/sh + +cp extfilter.service /etc/systemd/system/ +systemctl daemon-reload +#systemctl enable extfilter +#systemctl start extfilter diff --git a/include/AhoCorasickPlus.h b/include/AhoCorasickPlus.h new file mode 100644 index 0000000..7fd1d59 --- /dev/null +++ b/include/AhoCorasickPlus.h @@ -0,0 +1,81 @@ +/* + * AhoCorasickPlus.h: This is the header file for a sample + * C++ wrapper for Aho-Corasick C library + * + * This file is part of multifast. + * + Copyright 2010-2013 Kamiar Kanani + + multifast is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + multifast is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with multifast. If not, see . +*/ + +#ifndef AHOCORASICKPPW_H_ +#define AHOCORASICKPPW_H_ + +#include +using std::string; +#include +using std::queue; + +#include "actypes.h" + +// forward declaration +struct AC_AUTOMATA; +struct AC_TEXT; + + +class AhoCorasickPlus +{ +public: + + enum EnumReturnStatus + { + RETURNSTATUS_SUCCESS = 0, // No error occurred + RETURNSTATUS_DUPLICATE_PATTERN, // Duplicate patterns + RETURNSTATUS_LONG_PATTERN, // Pattern length is bigger than AC_PATTRN_MAX_LENGTH + RETURNSTATUS_ZERO_PATTERN, // Empty pattern (zero length) + RETURNSTATUS_AUTOMATA_CLOSED, // Automata is closed + RETURNSTATUS_FAILED, // General unknown failure + }; + + typedef unsigned int PatternId; + + struct Match + { + unsigned int position; + PatternId id; + AC_PATTERN_t pattern; + }; + +public: + + AhoCorasickPlus(); + ~AhoCorasickPlus(); + + EnumReturnStatus addPattern (const std::string &pattern, PatternId id); + EnumReturnStatus addPattern (const char pattern[], PatternId id); // zero ending string + void finalize (); + + void search (std::string& text, bool keep); + bool findNext (Match& match); + +private: + AC_AUTOMATA *m_automata; + AC_TEXT *m_acText; + std::queue m_matchQueue; // if multiple matches occur in a single position + // we save them here and return one by one + // for simplicity +}; + +#endif /* AHOCORASICKPPW_H_ */ diff --git a/include/Makefile.am b/include/Makefile.am new file mode 100644 index 0000000..b7c8550 --- /dev/null +++ b/include/Makefile.am @@ -0,0 +1,2 @@ + +noinst_HEADERS = main.h worker.h AhoCorasickPlus.h actypes.h ahocorasick.h patr.h patricia.h node.h statistictask.h ndpiwrapper.h qdpi.h sender.h sendertask.h stats.h diff --git a/include/actypes.h b/include/actypes.h new file mode 100644 index 0000000..cefcfbf --- /dev/null +++ b/include/actypes.h @@ -0,0 +1,143 @@ +/* + * actypes.h: Includes basic data types of ahocorasick library + * This file is part of multifast. + * + Copyright 2010-2013 Kamiar Kanani + + multifast is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + multifast is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with multifast. If not, see . +*/ + +#ifndef _AC_TYPES_H_ +#define _AC_TYPES_H_ + +#ifdef __cplusplus +/*extern "C" {*/ +#endif + +/* AC_ALPHABET_t: + * defines the alphabet type. + * Actually defining AC_ALPHABET_t as a char work as well, but sometimes we deal + * with streams of other basic types e.g. integers or enumerators. + * Although they consists of string of bytes (chars), but using their specific + * types as AC_ALPHABET_t will lead to a better performance. so instead of + * working with strings of chars, we assume that we are working with strings of + * AC_ALPHABET_t and leave it optional for other users to define their + * own alphabets. +**/ +typedef char AC_ALPHABET_t; + +/* AC_REP_t: + * Provides a more readable representative for a pattern. + * because patterns themselves are not always suitable for displaying + * (e.g. hex patterns), we offer this type to improve intelligibility + * of output. Sometimes it can be also useful, when you are + * retrieving patterns from a database, to maintain their identifiers in the + * automata for further reference. we provisioned two possible types as a + * union. you can add your desired type in it. +**/ +typedef union AC_REP +{ + const char * stringy; /* null-terminated string */ + unsigned long number; +} AC_REP_t; + +/* AC_PATTERN_t: + * This is the pattern type that must be fed into AC automata. + * the 'astring' field is not null-terminated, because it can contain zero + * value bytes. the 'length' field determines the number of AC_ALPHABET_t it + * carries. the 'rep' field is described in AC_REP_t. despite + * 'astring', 'rep' can have duplicate values for different given + * AC_PATTERN_t. it is an optional field and you can just fill it with 0. + * CAUTION: + * Not always the 'astring' points to the correct position in memory. + * it is the responsibility of your program to maintain a permanent allocation + * for astring field. +**/ +typedef struct AC_PATTERN +{ + const AC_ALPHABET_t * astring; /* String of alphabets */ + unsigned int length; /* Length of pattern */ + AC_REP_t rep; /* Representative string (optional) */ +} AC_PATTERN_t; + +/* AC_TEXT_t: + * The input text type that is fed to ac_automata_search() to be searched. + * it is similar to AC_PATTERN_t. actually we could use AC_PATTERN_t as input + * text, but for the purpose of being more readable, we defined this new type. +**/ +typedef struct AC_TEXT +{ + const AC_ALPHABET_t * astring; /* String of alphabets */ + unsigned int length; /* Length of string */ +} AC_TEXT_t; + +/* AC_MATCH_t: + * Provides the structure for reporting a match in the text. + * a match occurs when the automata reaches a final node. any final + * node can match one or more pattern at a position in a text. the + * 'patterns' field holds these matched patterns. obviously these + * matched patterns have same end-position in the text. there is a relationship + * between matched patterns: the shorter one is a factor (tail) of the longer + * one. the 'position' maintains the end position of matched patterns. the + * start position of patterns could be found by knowing their 'length' in + * AC_PATTERN_t. e.g. suppose "recent" and "cent" are matched at + * position 40 in the text, then the start position of them are 34 and 36 + * respectively. finally the field 'match_num' maintains the number of + * matched patterns. +**/ +typedef struct AC_MATCH +{ + AC_PATTERN_t * patterns; /* Array of matched pattern */ + long position; /* The end position of matching pattern(s) in the text */ + unsigned int match_num; /* Number of matched patterns */ +} AC_MATCH_t; + +/* AC_STATUS_t: + * Return status of an AC function +**/ +typedef enum AC_STATUS +{ + ACERR_SUCCESS = 0, /* No error occurred */ + ACERR_DUPLICATE_PATTERN, /* Duplicate patterns */ + ACERR_LONG_PATTERN, /* Pattern length is longer than AC_PATTRN_MAX_LENGTH */ + ACERR_ZERO_PATTERN, /* Empty pattern (zero length) */ + ACERR_AUTOMATA_CLOSED /* Automata is closed. after calling + * ac_automata_finalize() you can not add new + * patterns to the automata. */ +} AC_STATUS_t; + +/* AC_MATCH_CALBACK_t: + * This is the call-back function to report match back to the caller. + * when a match is find, the automata will reach you using this function and sends + * you a pointer to AC_MATCH_t. using that pointer you can handle + * matches. you can send parameters to the call-back function when you call + * ac_automata_search(). at call-back, the automata will sent you those + * parameters as the second parameter (void *) of AC_MATCH_CALBACK_t. inside + * the call-back function you can cast it to whatever you want. + * If you return 0 from AC_MATCH_CALBACK_t function to the automata, it will + * continue searching, otherwise it will return from ac_automata_search() + * to your calling function. +**/ +typedef int (*AC_MATCH_CALBACK_f)(AC_MATCH_t *, void *); + +/* AC_PATTRN_MAX_LENGTH: + * Maximum acceptable pattern length in AC_PATTERN_t.length +**/ +#define AC_PATTRN_MAX_LENGTH 1024 + +#ifdef __cplusplus +/*}*/ +#endif + +#endif diff --git a/include/ahocorasick.h b/include/ahocorasick.h new file mode 100644 index 0000000..b1cc1ce --- /dev/null +++ b/include/ahocorasick.h @@ -0,0 +1,90 @@ +/* + * ahocorasick.h: the main ahocorasick header file. + * This file is part of multifast. + * + Copyright 2010-2013 Kamiar Kanani + + multifast is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + multifast is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with multifast. If not, see . +*/ + +#ifndef _AUTOMATA_H_ +#define _AUTOMATA_H_ + +#include "actypes.h" + +#ifdef __cplusplus +/*extern "C" {*/ +#endif + +struct AC_NODE; + +typedef struct AC_AUTOMATA +{ + /* The root of the Aho-Corasick trie */ + struct AC_NODE * root; + + /* maintain all nodes pointers. it will be used to access or release + * all nodes. */ + struct AC_NODE ** all_nodes; + + unsigned int all_nodes_num; /* Number of all nodes in the automata */ + unsigned int all_nodes_max; /* Current max allocated memory for *all_nodes */ + + /* this flag indicates that if automata is finalized by + * ac_automata_finalize() or not. 1 means finalized and 0 + * means not finalized (is open). after finalizing automata you can not + * add pattern to automata anymore. */ + unsigned short automata_open; + + /* It is possible to feed a large input to the automata chunk by chunk to + * be searched using ac_automata_search(). in fact by default automata + * thinks that all chunks are related unless you do ac_automata_reset(). + * followings are variables that keep track of searching state. */ + struct AC_NODE * current_node; /* Pointer to current node while searching */ + unsigned long base_position; /* Represents the position of current chunk + * related to whole input text */ + + /* The input text. + * used only when it is working in settext/findnext mode */ + AC_TEXT_t * text; + + /* The lase searched position in the chunk. + * used only when it is working in settext/findnext mode */ + unsigned long position; + + /* Statistic Variables */ + + /* Total patterns in the automata */ + unsigned long total_patterns; + +} AC_AUTOMATA_t; + + +AC_AUTOMATA_t * ac_automata_init (void); +AC_STATUS_t ac_automata_add (AC_AUTOMATA_t * thiz, AC_PATTERN_t * str); +void ac_automata_finalize (AC_AUTOMATA_t * thiz); +int ac_automata_search (AC_AUTOMATA_t * thiz, AC_TEXT_t * text, int keep, AC_MATCH_CALBACK_f callback, void * param); + +void ac_automata_settext (AC_AUTOMATA_t * thiz, AC_TEXT_t * text, int keep); +AC_MATCH_t * ac_automata_findnext (AC_AUTOMATA_t * thiz); + +void ac_automata_release (AC_AUTOMATA_t * thiz); +void ac_automata_display (AC_AUTOMATA_t * thiz, char repcast); + + +#ifdef __cplusplus +/*}*/ +#endif + +#endif diff --git a/include/config.h b/include/config.h new file mode 100644 index 0000000..f0d363e --- /dev/null +++ b/include/config.h @@ -0,0 +1,74 @@ +/* include/config.h. Generated from config.h.in by configure. */ +/* include/config.h.in. Generated from configure.ac by autoheader. */ + +/* Define to 1 if you have the header file. */ +#define HAVE_INTTYPES_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_MEMORY_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_NETINET_IN_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_POCO_FOUNDATION_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_POCO_NET_HTTPCOOKIE_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_POCO_UTIL_TIMER_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STDINT_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STDLIB_H 1 + +/* Define to 1 if you have the `strerror' function. */ +#define HAVE_STRERROR 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STRINGS_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STRING_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_STAT_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_TYPES_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_UNISTD_H 1 + +/* Define to 1 if the system has the type `_Bool'. */ +/* #undef HAVE__BOOL */ + +/* Name of package */ +#define PACKAGE "extfilter" + +/* Define to the address where bug reports for this package should be sent. */ +#define PACKAGE_BUGREPORT "max1976@mail.ru" + +/* Define to the full name of this package. */ +#define PACKAGE_NAME "extFilter" + +/* Define to the full name and version of this package. */ +#define PACKAGE_STRING "extFilter 0.1" + +/* Define to the one symbol short name of this package. */ +#define PACKAGE_TARNAME "extfilter" + +/* Define to the home page for this package. */ +#define PACKAGE_URL "" + +/* Define to the version of this package. */ +#define PACKAGE_VERSION "0.1" + +/* Define to 1 if you have the ANSI C header files. */ +#define STDC_HEADERS 1 + +/* Version number of package */ +#define VERSION "0.1" diff --git a/include/config.h.in b/include/config.h.in new file mode 100644 index 0000000..9eb3e1f --- /dev/null +++ b/include/config.h.in @@ -0,0 +1,73 @@ +/* include/config.h.in. Generated from configure.ac by autoheader. */ + +/* Define to 1 if you have the header file. */ +#undef HAVE_INTTYPES_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_MEMORY_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_NETINET_IN_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_POCO_FOUNDATION_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_POCO_NET_HTTPCOOKIE_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_POCO_UTIL_TIMER_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_STDINT_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_STDLIB_H + +/* Define to 1 if you have the `strerror' function. */ +#undef HAVE_STRERROR + +/* Define to 1 if you have the header file. */ +#undef HAVE_STRINGS_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_STRING_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_STAT_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_TYPES_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_UNISTD_H + +/* Define to 1 if the system has the type `_Bool'. */ +#undef HAVE__BOOL + +/* Name of package */ +#undef PACKAGE + +/* Define to the address where bug reports for this package should be sent. */ +#undef PACKAGE_BUGREPORT + +/* Define to the full name of this package. */ +#undef PACKAGE_NAME + +/* Define to the full name and version of this package. */ +#undef PACKAGE_STRING + +/* Define to the one symbol short name of this package. */ +#undef PACKAGE_TARNAME + +/* Define to the home page for this package. */ +#undef PACKAGE_URL + +/* Define to the version of this package. */ +#undef PACKAGE_VERSION + +/* Define to 1 if you have the ANSI C header files. */ +#undef STDC_HEADERS + +/* Version number of package */ +#undef VERSION diff --git a/include/config.h.in~ b/include/config.h.in~ new file mode 100644 index 0000000..9eb3e1f --- /dev/null +++ b/include/config.h.in~ @@ -0,0 +1,73 @@ +/* include/config.h.in. Generated from configure.ac by autoheader. */ + +/* Define to 1 if you have the header file. */ +#undef HAVE_INTTYPES_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_MEMORY_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_NETINET_IN_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_POCO_FOUNDATION_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_POCO_NET_HTTPCOOKIE_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_POCO_UTIL_TIMER_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_STDINT_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_STDLIB_H + +/* Define to 1 if you have the `strerror' function. */ +#undef HAVE_STRERROR + +/* Define to 1 if you have the header file. */ +#undef HAVE_STRINGS_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_STRING_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_STAT_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_TYPES_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_UNISTD_H + +/* Define to 1 if the system has the type `_Bool'. */ +#undef HAVE__BOOL + +/* Name of package */ +#undef PACKAGE + +/* Define to the address where bug reports for this package should be sent. */ +#undef PACKAGE_BUGREPORT + +/* Define to the full name of this package. */ +#undef PACKAGE_NAME + +/* Define to the full name and version of this package. */ +#undef PACKAGE_STRING + +/* Define to the one symbol short name of this package. */ +#undef PACKAGE_TARNAME + +/* Define to the home page for this package. */ +#undef PACKAGE_URL + +/* Define to the version of this package. */ +#undef PACKAGE_VERSION + +/* Define to 1 if you have the ANSI C header files. */ +#undef STDC_HEADERS + +/* Version number of package */ +#undef VERSION diff --git a/include/main.h b/include/main.h new file mode 100644 index 0000000..b30dcf4 --- /dev/null +++ b/include/main.h @@ -0,0 +1,87 @@ +#pragma once + +#include +#include +#include +#include "worker.h" +#include "sender.h" + +#define DEFAULT_MBUF_POOL_SIZE 4095 + + + +class AhoCorasickPlus; +class Patricia; + +class extFilter: public Poco::Util::ServerApplication +{ + +public: + extFilter(); + ~extFilter(); + + void initialize(Application& self); + void uninitialize(); + void defineOptions(Poco::Util::OptionSet& options); + void handleOption(const std::string& name,const std::string& value); + void handleHelp(const std::string& name,const std::string& value); + void displayHelp(); + + /// Print DPDK ports + void printDPDKPorts(const std::string& name,const std::string& value); + int main(const ArgVec& args); + + /** + Load domains for blocking. + **/ + void loadDomains(std::string &fn, AhoCorasickPlus *_dm_atm,DomainsMatchType *_dm_map); + + /** + Load URLs for blocking. + **/ + void loadURLs(std::string &fn, AhoCorasickPlus *dm_atm); + + /** + Load IP SSL for blocking. + **/ + void loadSSLIP(const std::string &fn, Patricia *patricia); + + /** + Load IP:port for blocking. + **/ + void loadHosts(std::string &fn,IPPortMap *ippm); + + pcpp::CoreMask _coreMaskToUse; + uint32_t _BufPoolSize = DEFAULT_MBUF_POOL_SIZE; + std::vector _dpdkPortVec; + + // nDPI structures + static struct ndpi_detection_module_struct* my_ndpi_struct; + static u_int32_t ndpi_size_flow_struct; + static u_int32_t ndpi_size_id_struct; + static u_int32_t current_ndpi_memory; + static u_int32_t max_ndpi_memory; + +private: + bool _helpRequested; + bool _listDPDKPorts; + int _nbRxQueues; + + std::string _urlsFile; + std::string _domainsFile; + std::string _sslIpsFile; + std::string _sslFile; + std::string _hostsFile; + + bool _lower_host; + bool _match_url_exactly; + bool _block_undetected_ssl; + bool _http_redirect; + + int _statistic_interval; + enum ADD_P_TYPES _add_p_type; + struct CSender::params _sender_params; +}; + + + diff --git a/include/ndpiwrapper.h b/include/ndpiwrapper.h new file mode 100644 index 0000000..756afe1 --- /dev/null +++ b/include/ndpiwrapper.h @@ -0,0 +1,62 @@ +/* +* +* Copyright (C) Max +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +* +*/ + +#ifndef __NDPI_WRAPPER_H +#define __NDPI_WRAPPER_H + +#include "main.h" +#include + +class nDPIWrapper +{ +public: + nDPIWrapper() + { + src = (struct ndpi_id_struct*)calloc(1,extFilter::ndpi_size_id_struct); + dst = (struct ndpi_id_struct*)calloc(1,extFilter::ndpi_size_id_struct); + flow = (struct ndpi_flow_struct *)calloc(1,extFilter::ndpi_size_flow_struct); + } + ~nDPIWrapper() + { + ndpi_free_flow(flow); + if(src) + free(src); + if(dst) + free(dst); + } + + inline struct ndpi_flow_struct *get_flow() + { + return flow; + } + inline struct ndpi_id_struct *get_src() + { + return src; + } + inline struct ndpi_id_struct *get_dst() + { + return dst; + } +private: + struct ndpi_id_struct *src; + struct ndpi_id_struct *dst; + struct ndpi_flow_struct *flow; +}; + +#endif diff --git a/include/node.h b/include/node.h new file mode 100644 index 0000000..c2a5c76 --- /dev/null +++ b/include/node.h @@ -0,0 +1,74 @@ +/* + * node.h: automata node header file + * This file is part of multifast. + * + Copyright 2010-2013 Kamiar Kanani + + multifast is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + multifast is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with multifast. If not, see . +*/ + +#ifndef _NODE_H_ +#define _NODE_H_ + +#include "actypes.h" + +#ifdef __cplusplus +/*extern "C" {*/ +#endif + +/* Forward Declaration */ +struct edge; + +/* automata node */ +typedef struct AC_NODE +{ + int id; /* Node ID : for debugging purpose */ + short int final; /* 0: no ; 1: yes, it is a final node */ + struct AC_NODE * failure_node; /* The failure node of this node */ + unsigned short depth; /* depth: distance between this node and the root */ + + /* Matched patterns */ + AC_PATTERN_t * matched_patterns; /* Array of matched patterns */ + unsigned short matched_patterns_num; /* Number of matched patterns at this node */ + unsigned short matched_patterns_max; /* Max capacity of allocated memory for matched_patterns */ + + /* Outgoing Edges */ + struct edge * outgoing; /* Array of outgoing edges */ + unsigned short outgoing_degree; /* Number of outgoing edges */ + unsigned short outgoing_max; /* Max capacity of allocated memory for outgoing */ +} AC_NODE_t; + +/* The Edge of the Node */ +struct edge +{ + AC_ALPHABET_t alpha; /* Edge alpha */ + AC_NODE_t * next; /* Target of the edge */ +}; + + +AC_NODE_t * node_create (void); +AC_NODE_t * node_create_next (AC_NODE_t * thiz, AC_ALPHABET_t alpha); +void node_register_matchstr (AC_NODE_t * thiz, AC_PATTERN_t * str); +void node_register_outgoing (AC_NODE_t * thiz, AC_NODE_t * next, AC_ALPHABET_t alpha); +AC_NODE_t * node_find_next (AC_NODE_t * thiz, AC_ALPHABET_t alpha); +AC_NODE_t * node_findbs_next (AC_NODE_t * thiz, AC_ALPHABET_t alpha); +void node_release (AC_NODE_t * thiz); +void node_assign_id (AC_NODE_t * thiz); +void node_sort_edges (AC_NODE_t * thiz); + +#ifdef __cplusplus +/*}*/ +#endif + +#endif diff --git a/include/patr.h b/include/patr.h new file mode 100644 index 0000000..296d175 --- /dev/null +++ b/include/patr.h @@ -0,0 +1,43 @@ +/* +* +* Copyright (C) Max +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +* +*/ + +#ifndef __PATR_H +#define __PATR_H + +#include +#include +#include "patricia.h" + +class Patricia +{ +public: + Patricia(); + ~Patricia(); + + patricia_node_t *make_and_lookup(std::string &addr); + /// Поиск только по адресу + patricia_node_t *try_search_exact_ip(Poco::Net::IPAddress &address); + void print_all_nodes(); +private: + bool fill_prefix(int family, void *dest, int bitlen, prefix_t &prefix); + patricia_tree_t *tree_ipv4; + patricia_tree_t *tree_ipv6; +}; + +#endif diff --git a/include/patricia.h b/include/patricia.h new file mode 100644 index 0000000..f4db2fc --- /dev/null +++ b/include/patricia.h @@ -0,0 +1,155 @@ +/* + * $Id: patricia.h,v 1.6 2005/12/07 20:53:01 dplonka Exp $ + * Dave Plonka + * + * This product includes software developed by the University of Michigan, + * Merit Network, Inc., and their contributors. + * + * This file had been called "radix.h" in the MRT sources. + * + * I renamed it to "patricia.h" since it's not an implementation of a general + * radix trie. Also, pulled in various requirements from "mrt.h" and added + * some other things it could be used as a standalone API. + */ + +#ifndef _PATRICIA_H +#define _PATRICIA_H + +#ifdef __cplusplus +extern "C" { +#endif + +#define HAVE_IPV6 + +/* typedef unsigned int u_int; */ +typedef void (*void_fn_t)(); +/* { from defs.h */ +#define prefix_touchar(prefix) ((u_char *)&(prefix)->add.sin) +#define MAXLINE 1024 +#define BIT_TEST(f, b) ((f) & (b)) +/* } */ + +#define addroute make_and_lookup + +#include /* for u_* definitions (on FreeBSD 5) */ + +#include /* for EAFNOSUPPORT */ +#ifndef EAFNOSUPPORT +# defined EAFNOSUPPORT WSAEAFNOSUPPORT +# include +#else +# include /* for struct in_addr */ +#endif + +#include /* for AF_INET */ + +/* { from mrt.h */ + +typedef struct _prefix4_t { + u_short family; /* AF_INET | AF_INET6 */ + u_short bitlen; /* same as mask? */ + int ref_count; /* reference count */ + struct in_addr sin; +} prefix4_t; + +typedef struct _prefix_t { + u_short family; /* AF_INET | AF_INET6 */ + u_short bitlen; /* same as mask? */ + int ref_count; /* reference count */ + union { + struct in_addr sin; +#ifdef HAVE_IPV6 + struct in6_addr sin6; +#endif /* IPV6 */ + } add; +} prefix_t; + +/* } */ + +typedef struct _patricia_node_t { + u_int bit; /* flag if this node used */ + prefix_t *prefix; /* who we are in patricia tree */ + struct _patricia_node_t *l, *r; /* left and right children */ + struct _patricia_node_t *parent;/* may be used */ + void *data; /* pointer to data */ + void *user1; /* pointer to usr data (ex. route flap info) */ +} patricia_node_t; + +typedef struct _patricia_tree_t { + patricia_node_t *head; + u_int maxbits; /* for IP, 32 bit addresses */ + int num_active_node; /* for debug purpose */ +} patricia_tree_t; + + +patricia_node_t *patricia_search_exact (patricia_tree_t *patricia, prefix_t *prefix); +patricia_node_t *patricia_search_best (patricia_tree_t *patricia, prefix_t *prefix); +patricia_node_t * patricia_search_best2 (patricia_tree_t *patricia, prefix_t *prefix, + int inclusive); +patricia_node_t *patricia_lookup (patricia_tree_t *patricia, prefix_t *prefix); +void patricia_remove (patricia_tree_t *patricia, patricia_node_t *node); +patricia_tree_t *New_Patricia (int maxbits); +void Clear_Patricia (patricia_tree_t *patricia, void_fn_t func); +void Destroy_Patricia (patricia_tree_t *patricia, void_fn_t func); + +void patricia_process (patricia_tree_t *patricia, void_fn_t func); + +char *prefix_toa (prefix_t * prefix); + +prefix_t *New_Prefix (int family, void *dest, int bitlen); +void Deref_Prefix (prefix_t * prefix); +size_t patricia_walk_inorder(patricia_node_t *node, void_fn_t func); + + +#define PATRICIA_MAXBITS (sizeof(struct in6_addr) * 8) +#define PATRICIA_NBIT(x) (0x80 >> ((x) & 0x7f)) +#define PATRICIA_NBYTE(x) ((x) >> 3) + +#define PATRICIA_DATA_GET(node, type) (type *)((node)->data) +#define PATRICIA_DATA_SET(node, value) ((node)->data = (void *)(value)) + +#define PATRICIA_WALK(Xhead, Xnode) \ + do { \ + patricia_node_t *Xstack[PATRICIA_MAXBITS+1]; \ + patricia_node_t **Xsp = Xstack; \ + patricia_node_t *Xrn = (Xhead); \ + while ((Xnode = Xrn)) { \ + if (Xnode->prefix) + +#define PATRICIA_WALK_ALL(Xhead, Xnode) \ +do { \ + patricia_node_t *Xstack[PATRICIA_MAXBITS+1]; \ + patricia_node_t **Xsp = Xstack; \ + patricia_node_t *Xrn = (Xhead); \ + while ((Xnode = Xrn)) { \ + if (1) + +#define PATRICIA_WALK_BREAK { \ + if (Xsp != Xstack) { \ + Xrn = *(--Xsp); \ + } else { \ + Xrn = (patricia_node_t *) 0; \ + } \ + continue; } + +#define PATRICIA_WALK_END \ + if (Xrn->l) { \ + if (Xrn->r) { \ + *Xsp++ = Xrn->r; \ + } \ + Xrn = Xrn->l; \ + } else if (Xrn->r) { \ + Xrn = Xrn->r; \ + } else if (Xsp != Xstack) { \ + Xrn = *(--Xsp); \ + } else { \ + Xrn = (patricia_node_t *) 0; \ + } \ + } \ + } while (0) + +#ifdef __cplusplus +} +#endif + +#endif /* _PATRICIA_H */ diff --git a/include/qdpi.h b/include/qdpi.h new file mode 100644 index 0000000..44e65e0 --- /dev/null +++ b/include/qdpi.h @@ -0,0 +1,26 @@ +/* +* +* Copyright (C) Max +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +* +*/ +#ifndef __QDPI_H +#define __QDPI_H + +#include + +struct ndpi_detection_module_struct* init_ndpi(); + +#endif diff --git a/include/sender.h b/include/sender.h new file mode 100644 index 0000000..f3c9768 --- /dev/null +++ b/include/sender.h @@ -0,0 +1,67 @@ +/* +* +* Copyright (C) Max +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +* +*/ +#ifndef __SENDER_H +#define __SENDER_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + + +class CSender { +public: + struct params + { + std::string redirect_url; + std::string code; + bool send_rst_to_server; + int ttl; + int ip6_hops; + + params() : code("302 Moved Temporarily"), send_rst_to_server(false), ttl(250), ip6_hops(250) { } + }; + CSender( std::string url ); + CSender(struct params &prm); + ~CSender(); + void Redirect(int user_port, int dst_port, Poco::Net::IPAddress &src_ip, Poco::Net::IPAddress &dst_ip, uint32_t acknum, uint32_t seqnum, int f_psh, std::string &additional_param); + void sendPacket(Poco::Net::IPAddress &ip_from, Poco::Net::IPAddress &ip_to, int port_from, int port_to, uint32_t acknum, uint32_t seqnum, std::string &dt, int f_reset, int f_psh); + void SendRST(int user_port, int dst_port, Poco::Net::IPAddress &user_ip, Poco::Net::IPAddress &dst_ip, uint32_t acknum, uint32_t seqnum, int f_psh); +private: + unsigned short csum(unsigned short *ptr, int nbytes); + int s; + int s6; + std::string rHeader; + Poco::Logger& _logger; + struct params _parameters; +}; + + +#endif diff --git a/include/sendertask.h b/include/sendertask.h new file mode 100644 index 0000000..52b5ec2 --- /dev/null +++ b/include/sendertask.h @@ -0,0 +1,116 @@ +/* +* +* Copyright (C) Max +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +* +*/ +#ifndef __SENDER_TASK_H +#define __SENDER_TASK_H + +#include +#include +#include +#include +#include +#include +#include + +#include "sender.h" + +class RedirectNotification: public Poco::Notification + // The notification sent to worker threads. +{ +public: + typedef Poco::AutoPtr Ptr; + + RedirectNotification(int user_port, int dst_port, Poco::Net::IPAddress *user_ip, Poco::Net::IPAddress *dst_ip, uint32_t acknum, uint32_t seqnum, int f_psh, std::string &additional_param, bool is_rst=false): + _user_port(user_port), + _dst_port(dst_port), + _user_ip(*user_ip), + _dst_ip(*dst_ip), + _acknum(acknum), + _seqnum(seqnum), + _f_psh(f_psh), + _additional_param(additional_param), + _is_rst(is_rst) + { + } + int user_port() + { + return _user_port; + } + int dst_port() + { + return _dst_port; + } + Poco::Net::IPAddress &user_ip() + { + return _user_ip; + } + Poco::Net::IPAddress &dst_ip() + { + return _dst_ip; + } + u_int32_t acknum() + { + return _acknum; + } + u_int32_t seqnum() + { + return _seqnum; + } + int f_psh() + { + return _f_psh; + } + std::string &additional_param() + { + return _additional_param; + } + bool is_rst() + { + return _is_rst; + } +private: + int _user_port; + int _dst_port; + Poco::Net::IPAddress _user_ip; + Poco::Net::IPAddress _dst_ip; + uint32_t _acknum; + uint32_t _seqnum; + int _f_psh; + std::string _additional_param; + bool _is_rst; +}; + +/// Данная задача отсылает редирект заданному клиенту +class SenderTask: public Poco::Task +{ +public: + SenderTask(struct CSender::params &prm); + ~SenderTask(); + + void runTask(); + + // очередь, куда необходимо писать отправные данные... + static Poco::NotificationQueue queue; + +private: + CSender *sender; + static Poco::FastMutex _mutex; + Poco::Logger& _logger; +}; + +#endif diff --git a/include/stamp-h1 b/include/stamp-h1 new file mode 100644 index 0000000..b330768 --- /dev/null +++ b/include/stamp-h1 @@ -0,0 +1 @@ +timestamp for include/config.h diff --git a/include/statistictask.h b/include/statistictask.h new file mode 100644 index 0000000..76144e8 --- /dev/null +++ b/include/statistictask.h @@ -0,0 +1,38 @@ +/* +* +* Copyright (C) Max +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +* +*/ + +#pragma once +#include +#include +#include + +class StatisticTask: public Poco::Task +{ +public: + StatisticTask(int sec, std::vector& workerThreadVector); + void runTask(); + void OutStatistic(); + +private: + // через сколько секунд выводить инфо о потреблении память. 0 - не выводить + int _sec; + std::vector& workerThreadVec; +}; + + diff --git a/include/stats.h b/include/stats.h new file mode 100644 index 0000000..75fb5a6 --- /dev/null +++ b/include/stats.h @@ -0,0 +1,24 @@ +#pragma once + +class ThreadStats +{ +public: + uint64_t redirected_domains; + uint64_t redirected_urls; + uint64_t sended_rst; + uint64_t ip_packets; + uint64_t total_bytes; + uint64_t matched_ssl; + uint64_t matched_ssl_ip; + uint64_t matched_ip_port; + uint64_t total_packets; + uint64_t analyzed_packets; + uint64_t matched_domains; + uint64_t matched_urls; + + ThreadStats() : redirected_domains(0), redirected_urls(0), sended_rst(0), ip_packets(0), total_bytes(0), matched_ssl(0), matched_ssl_ip(0), matched_ip_port(0),total_packets(0), analyzed_packets(0), matched_domains(0), matched_urls(0) {} + + void clear() { redirected_domains = 0; redirected_urls = 0; sended_rst = 0; ip_packets = 0; total_bytes = 0; matched_ssl = 0; matched_ssl_ip = 0; matched_ip_port = 0; total_packets = 0; analyzed_packets = 0; matched_domains = 0; matched_urls =0; } + + +}; diff --git a/include/worker.h b/include/worker.h new file mode 100644 index 0000000..35484cc --- /dev/null +++ b/include/worker.h @@ -0,0 +1,95 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "AhoCorasickPlus.h" +#include "patr.h" +#include "stats.h" + +typedef Poco::HashMap DomainsMatchType; + +typedef std::map > InputDataConfig; + +typedef std::map> IPPortMap; + +enum ADD_P_TYPES { A_TYPE_NONE, A_TYPE_ID, A_TYPE_URL }; + +/** + * Contains all the configuration needed for the worker thread including: + * - Which DPDK ports and which RX queues to receive packet from + * - Whether to send matched packets to TX DPDK port and/or save them to a pcap file + */ +struct WorkerConfig +{ + uint32_t CoreId; + InputDataConfig InDataCfg; + AhoCorasickPlus *atm; + Poco::FastMutex atmLock; // для загрузки url + AhoCorasickPlus *atmDomains; + DomainsMatchType domainsMatchType; + Poco::FastMutex atmDomainsLock; // для загрузки domains + AhoCorasickPlus *atmSSLDomains; + DomainsMatchType SSLdomainsMatchType; + Poco::FastMutex atmSSLDomainsLock; // для загрузки domains + Patricia *sslIPs; // ip addresses for blocking + IPPortMap *ipportMap; + Poco::FastMutex ipportMapLock; + + bool match_url_exactly; + bool lower_host; + bool block_undetected_ssl; + bool http_redirect; + std::string PathToWritePackets; + enum ADD_P_TYPES add_p_type; + WorkerConfig() : CoreId(MAX_NUM_OF_CORES+1), atm(NULL), atmDomains(NULL), atmSSLDomains(NULL), sslIPs(NULL), ipportMap(NULL), match_url_exactly(false),lower_host(false),block_undetected_ssl(false),http_redirect(true),add_p_type(A_TYPE_NONE) { } +}; + + +class WorkerThread : public pcpp::DpdkWorkerThread +{ +private: + WorkerConfig& m_WorkerConfig; + bool m_Stop; + uint32_t m_CoreId; +// std::map m_FlowTable; + Poco::Logger& _logger; + ThreadStats m_ThreadStats; + bool analyzePacket(pcpp::Packet &parsedPacket); +public: + WorkerThread(const std::string& name, WorkerConfig& workerConfig) : + m_WorkerConfig(workerConfig), m_Stop(true), m_CoreId(MAX_NUM_OF_CORES+1), + _logger(Poco::Logger::get(name)) + { + } + + virtual ~WorkerThread() {} + + bool run(uint32_t coreId); + + void stop() + { + // assign the stop flag which will cause the main loop to end + m_Stop = true; + } + + uint32_t getCoreId() + { + return m_CoreId; + } + + const ThreadStats& getStats() + { + return m_ThreadStats; + } + +}; \ No newline at end of file diff --git a/src/AhoCorasickPlus.cpp b/src/AhoCorasickPlus.cpp new file mode 100644 index 0000000..3a21eb4 --- /dev/null +++ b/src/AhoCorasickPlus.cpp @@ -0,0 +1,111 @@ +/* + * AhoCorasickPlus.cpp: This is the implementation file for a sample + * C++ wrapper for Aho-Corasick C library + * + * This file is part of multifast. + * + Copyright 2010-2013 Kamiar Kanani + + multifast is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + multifast is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with multifast. If not, see . +*/ + +#include "ahocorasick.h" +#include "AhoCorasickPlus.h" + +AhoCorasickPlus::AhoCorasickPlus () +{ + m_automata = ac_automata_init (); + m_acText = new AC_TEXT_t; +} + +AhoCorasickPlus::~AhoCorasickPlus () +{ + ac_automata_release (m_automata); + delete m_acText; +} + +AhoCorasickPlus::EnumReturnStatus AhoCorasickPlus::addPattern (const std::string &pattern, PatternId id) +{ + EnumReturnStatus rv = RETURNSTATUS_FAILED; + + AC_PATTERN_t tmp_patt; + tmp_patt.astring = (AC_ALPHABET_t*) pattern.c_str(); + tmp_patt.length = pattern.size(); + tmp_patt.rep.number = id; + + AC_STATUS_t status = ac_automata_add (m_automata, &tmp_patt); + + switch (status) + { + case ACERR_SUCCESS: rv = RETURNSTATUS_SUCCESS; break; + case ACERR_DUPLICATE_PATTERN: rv = RETURNSTATUS_DUPLICATE_PATTERN; break; + case ACERR_LONG_PATTERN: rv = RETURNSTATUS_LONG_PATTERN; break; + case ACERR_ZERO_PATTERN: rv = RETURNSTATUS_ZERO_PATTERN; break; + case ACERR_AUTOMATA_CLOSED: rv = RETURNSTATUS_AUTOMATA_CLOSED; break; + } + return rv; +} + +AhoCorasickPlus::EnumReturnStatus AhoCorasickPlus::addPattern (const char pattern[], PatternId id) +{ + std::string tmpString = pattern; + return addPattern (tmpString, id); +} + +void AhoCorasickPlus::finalize () +{ + ac_automata_finalize (m_automata); +} + +void AhoCorasickPlus::search (std::string& text, bool keep) +{ + m_acText->astring = text.c_str(); + m_acText->length = text.size(); + ac_automata_settext (m_automata, m_acText, (int)keep); +} + +bool AhoCorasickPlus::findNext (Match& match) +{ + if (m_matchQueue.size()>0) + { + match = m_matchQueue.front(); + m_matchQueue.pop(); + return true; + } + + AC_MATCH_t * matchp; + + if ((matchp = ac_automata_findnext (m_automata))) + { + Match singleMatch; + singleMatch.position = matchp->position; + + for (unsigned int j=0; j < matchp->match_num; j++) + { + singleMatch.id = matchp->patterns[j].rep.number; + singleMatch.pattern = matchp->patterns[j]; + // we ignore tmp_patt.astring it may have been invalidated + m_matchQueue.push(singleMatch); + } + } + + if (m_matchQueue.size()>0) + { + match = m_matchQueue.front(); + m_matchQueue.pop(); + return true; + } + + return false; +} diff --git a/src/Makefile.am b/src/Makefile.am new file mode 100644 index 0000000..a5023b3 --- /dev/null +++ b/src/Makefile.am @@ -0,0 +1,9 @@ +AM_CPPFLAGS = -I$(top_srcdir)/$(PPP_HEADER) -I$(top_srcdir)/nDPI/src/include +LDADD =-lpcap -L$(top_srcdir)/$(PPP_LIB) -lPcap++ -lPacket++ -lCommon++ \ +-L $(DPDK_LIB) -lrt -lm -ldl $(top_srcdir)/nDPI/src/lib/.libs/libndpi.a + +bin_PROGRAMS = extFilter + +extFilter_LDFLAGS = -Wl,--whole-archive -lrte_pmd_bond -lrte_pmd_vmxnet3_uio -lrte_pmd_virtio -lrte_pmd_enic -lrte_pmd_i40e -lrte_pmd_fm10k -lrte_pmd_ixgbe -lrte_pmd_e1000 -lrte_pmd_ring -lrte_pmd_af_packet -lethdev -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring -lrte_kvargs -lrte_hash -lrte_cmdline -Wl,--no-whole-archive + +extFilter_SOURCES = main.cpp worker.cpp AhoCorasickPlus.cpp ahocorasick.cpp node.cpp patricia.c patr.cpp qdpi.cpp sender.cpp sendertask.cpp statistictask.cpp diff --git a/src/ahocorasick.cpp b/src/ahocorasick.cpp new file mode 100644 index 0000000..99eb860 --- /dev/null +++ b/src/ahocorasick.cpp @@ -0,0 +1,461 @@ +/* + * ahocorasick.cpp: implementation of ahocorasick library's functions + * This file is part of multifast. + * + Copyright 2010-2013 Kamiar Kanani + + multifast is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + multifast is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with multifast. If not, see . +*/ + +#include +#include +#include +#include + +#include "node.h" +#include "ahocorasick.h" + +/* Allocation step for automata.all_nodes */ +#define REALLOC_CHUNK_ALLNODES 200 + +/* Private function prototype */ +static void ac_automata_register_nodeptr + (AC_AUTOMATA_t * thiz, AC_NODE_t * node); +static void ac_automata_union_matchstrs + (AC_NODE_t * node); +static void ac_automata_set_failure + (AC_AUTOMATA_t * thiz, AC_NODE_t * node, AC_ALPHABET_t * alphas); +static void ac_automata_traverse_setfailure + (AC_AUTOMATA_t * thiz, AC_NODE_t * node, AC_ALPHABET_t * alphas); +static void ac_automata_reset (AC_AUTOMATA_t * thiz); + + +/****************************************************************************** + * FUNCTION: ac_automata_init + * Initialize automata; allocate memories and set initial values + * PARAMS: +******************************************************************************/ +AC_AUTOMATA_t * ac_automata_init () +{ + AC_AUTOMATA_t * thiz = (AC_AUTOMATA_t *)malloc(sizeof(AC_AUTOMATA_t)); + memset (thiz, 0, sizeof(AC_AUTOMATA_t)); + thiz->root = node_create (); + thiz->all_nodes_max = REALLOC_CHUNK_ALLNODES; + thiz->all_nodes = (AC_NODE_t **) malloc (thiz->all_nodes_max*sizeof(AC_NODE_t *)); + ac_automata_register_nodeptr (thiz, thiz->root); + ac_automata_reset (thiz); + thiz->total_patterns = 0; + thiz->automata_open = 1; + return thiz; +} + +/****************************************************************************** + * FUNCTION: ac_automata_add + * Adds pattern to the automata. + * PARAMS: + * AC_AUTOMATA_t * thiz: the pointer to the automata + * AC_PATTERN_t * patt: the pointer to added pattern + * RETUERN VALUE: AC_ERROR_t + * the return value indicates the success or failure of adding action +******************************************************************************/ +AC_STATUS_t ac_automata_add (AC_AUTOMATA_t * thiz, AC_PATTERN_t * patt) +{ + unsigned int i; + AC_NODE_t * n = thiz->root; + AC_NODE_t * next; + AC_ALPHABET_t alpha; + + if(!thiz->automata_open) + return ACERR_AUTOMATA_CLOSED; + + if (!patt->length) + return ACERR_ZERO_PATTERN; + + if (patt->length > AC_PATTRN_MAX_LENGTH) + return ACERR_LONG_PATTERN; + + for (i=0; ilength; i++) + { + alpha = patt->astring[i]; + if ((next = node_find_next(n, alpha))) + { + n = next; + continue; + } + else + { + next = node_create_next(n, alpha); + next->depth = n->depth + 1; + n = next; + ac_automata_register_nodeptr(thiz, n); + } + } + + if(n->final) + return ACERR_DUPLICATE_PATTERN; + + n->final = 1; + node_register_matchstr(n, patt); + thiz->total_patterns++; + + return ACERR_SUCCESS; +} + +/****************************************************************************** + * FUNCTION: ac_automata_finalize + * Locate the failure node for all nodes and collect all matched pattern for + * every node. it also sorts outgoing edges of node, so binary search could be + * performed on them. after calling this function the automate literally will + * be finalized and you can not add new patterns to the automate. + * PARAMS: + * AC_AUTOMATA_t * thiz: the pointer to the automata +******************************************************************************/ +void ac_automata_finalize (AC_AUTOMATA_t * thiz) +{ + unsigned int i; + AC_ALPHABET_t alphas[AC_PATTRN_MAX_LENGTH]; + AC_NODE_t * node; + + ac_automata_traverse_setfailure (thiz, thiz->root, alphas); + + for (i=0; i < thiz->all_nodes_num; i++) + { + node = thiz->all_nodes[i]; + ac_automata_union_matchstrs (node); + node_sort_edges (node); + } + thiz->automata_open = 0; /* do not accept patterns any more */ +} + +/****************************************************************************** + * FUNCTION: ac_automata_search + * Search in the input text using the given automata. on match event it will + * call the call-back function. and the call-back function in turn after doing + * its job, will return an integer value to ac_automata_search(). 0 value means + * continue search, and non-0 value means stop search and return to the caller. + * PARAMS: + * AC_AUTOMATA_t * thiz: the pointer to the automata + * AC_TEXT_t * txt: the input text that must be searched + * int keep: is the input text the successive chunk of the previous given text + * void * param: this parameter will be send to call-back function. it is + * useful for sending parameter to call-back function from caller function. + * RETURN VALUE: + * -1: failed; automata is not finalized + * 0: success; input text was searched to the end + * 1: success; input text was searched partially. (callback broke the loop) +******************************************************************************/ +int ac_automata_search (AC_AUTOMATA_t * thiz, AC_TEXT_t * text, int keep, + AC_MATCH_CALBACK_f callback, void * param) +{ + unsigned long position; + AC_NODE_t * current; + AC_NODE_t * next; + AC_MATCH_t match; + + if (thiz->automata_open) + /* you must call ac_automata_locate_failure() first */ + return -1; + + thiz->text = 0; + + if (!keep) + ac_automata_reset(thiz); + + position = 0; + current = thiz->current_node; + + /* This is the main search loop. + * it must be as lightweight as possible. */ + while (position < text->length) + { + if (!(next = node_findbs_next(current, text->astring[position]))) + { + if(current->failure_node /* we are not in the root node */) + current = current->failure_node; + else + position++; + } + else + { + current = next; + position++; + } + + if (current->final && next) + /* We check 'next' to find out if we came here after a alphabet + * transition or due to a fail. in second case we should not report + * matching because it was reported in previous node */ + { + match.position = position + thiz->base_position; + match.match_num = current->matched_patterns_num; + match.patterns = current->matched_patterns; + /* we found a match! do call-back */ + if (callback(&match, param)) + return 1; + } + } + + /* save status variables */ + thiz->current_node = current; + thiz->base_position += position; + return 0; +} + +/****************************************************************************** + * FUNCTION: ac_automata_settext +******************************************************************************/ +void ac_automata_settext (AC_AUTOMATA_t * thiz, AC_TEXT_t * text, int keep) +{ + thiz->text = text; + if (!keep) + ac_automata_reset(thiz); + thiz->position = 0; +} + +/****************************************************************************** + * FUNCTION: ac_automata_findnext +******************************************************************************/ +AC_MATCH_t * ac_automata_findnext (AC_AUTOMATA_t * thiz) +{ + unsigned long position; + AC_NODE_t * current; + AC_NODE_t * next; + static AC_MATCH_t match; + + if (thiz->automata_open) + return 0; + + if (!thiz->text) + return 0; + + position = thiz->position; + current = thiz->current_node; + match.match_num = 0; + + /* This is the main search loop. + * it must be as lightweight as possible. */ + while (position < thiz->text->length) + { + if (!(next = node_findbs_next(current, thiz->text->astring[position]))) + { + if (current->failure_node /* we are not in the root node */) + current = current->failure_node; + else + position++; + } + else + { + current = next; + position++; + } + + if (current->final && next) + /* We check 'next' to find out if we came here after a alphabet + * transition or due to a fail. in second case we should not report + * matching because it was reported in previous node */ + { + match.position = position + thiz->base_position; + match.match_num = current->matched_patterns_num; + match.patterns = current->matched_patterns; + break; + } + } + + /* save status variables */ + thiz->current_node = current; + thiz->position = position; + + if (!match.match_num) + /* if we came here due to reaching to the end of input text + * not a loop break + */ + thiz->base_position += position; + + return match.match_num?&match:0; +} + +/****************************************************************************** + * FUNCTION: ac_automata_reset + * reset the automata and make it ready for doing new search on a new text. + * when you finished with the input text, you must reset automata state for + * new input, otherwise it will not work. + * PARAMS: + * AC_AUTOMATA_t * thiz: the pointer to the automata +******************************************************************************/ +void ac_automata_reset (AC_AUTOMATA_t * thiz) +{ + thiz->current_node = thiz->root; + thiz->base_position = 0; +} + +/****************************************************************************** + * FUNCTION: ac_automata_release + * Release all allocated memories to the automata + * PARAMS: + * AC_AUTOMATA_t * thiz: the pointer to the automata +******************************************************************************/ +void ac_automata_release (AC_AUTOMATA_t * thiz) +{ + unsigned int i; + AC_NODE_t * n; + + for (i=0; i < thiz->all_nodes_num; i++) + { + n = thiz->all_nodes[i]; + node_release(n); + } + free(thiz->all_nodes); + free(thiz); +} + +/****************************************************************************** + * FUNCTION: ac_automata_display + * Prints the automata to output in human readable form. it is useful for + * debugging purpose. + * PARAMS: + * AC_AUTOMATA_t * thiz: the pointer to the automata + * char repcast: 'n': print AC_REP_t as number, 's': print AC_REP_t as string +******************************************************************************/ +void ac_automata_display (AC_AUTOMATA_t * thiz, char repcast) +{ + unsigned int i, j; + AC_NODE_t * n; + struct edge * e; + AC_PATTERN_t sid; + + printf("---------------------------------\n"); + + for (i=0; iall_nodes_num; i++) + { + n = thiz->all_nodes[i]; + printf("NODE(%3d)/----fail----> NODE(%3d)\n", + n->id, (n->failure_node)?n->failure_node->id:1); + for (j=0; joutgoing_degree; j++) + { + e = &n->outgoing[j]; + printf(" |----("); + if(isgraph(e->alpha)) + printf("%c)---", e->alpha); + else + printf("0x%x)", e->alpha); + printf("--> NODE(%3d)\n", e->next->id); + } + if (n->matched_patterns_num) { + printf("Accepted patterns: {"); + for (j=0; jmatched_patterns_num; j++) + { + sid = n->matched_patterns[j]; + if(j) printf(", "); + switch (repcast) + { + case 'n': + printf("%ld", sid.rep.number); + break; + case 's': + printf("%s", sid.rep.stringy); + break; + } + } + printf("}\n"); + } + printf("---------------------------------\n"); + } +} + +/****************************************************************************** + * FUNCTION: ac_automata_register_nodeptr + * Adds the node pointer to all_nodes. +******************************************************************************/ +static void ac_automata_register_nodeptr (AC_AUTOMATA_t * thiz, AC_NODE_t * node) +{ + if(thiz->all_nodes_num >= thiz->all_nodes_max) + { + thiz->all_nodes_max += REALLOC_CHUNK_ALLNODES; + thiz->all_nodes = (AC_NODE **)realloc + (thiz->all_nodes, thiz->all_nodes_max*sizeof(AC_NODE_t *)); + } + thiz->all_nodes[thiz->all_nodes_num++] = node; +} + +/****************************************************************************** + * FUNCTION: ac_automata_union_matchstrs + * Collect accepted patterns of the node. the accepted patterns consist of the + * node's own accepted pattern plus accepted patterns of its failure node. +******************************************************************************/ +static void ac_automata_union_matchstrs (AC_NODE_t * node) +{ + unsigned int i; + AC_NODE_t * m = node; + + while ((m = m->failure_node)) + { + for (i=0; i < m->matched_patterns_num; i++) + node_register_matchstr(node, &(m->matched_patterns[i])); + + if (m->final) + node->final = 1; + } + /* TODO : sort matched_patterns? is that necessary? I don't think so. */ +} + +/****************************************************************************** + * FUNCTION: ac_automata_set_failure + * find failure node for the given node. +******************************************************************************/ +static void ac_automata_set_failure + (AC_AUTOMATA_t * thiz, AC_NODE_t * node, AC_ALPHABET_t * alphas) +{ + unsigned int i, j; + AC_NODE_t * m; + + for (i=1; i < node->depth; i++) + { + m = thiz->root; + for (j=i; j < node->depth && m; j++) + m = node_find_next (m, alphas[j]); + if (m) + { + node->failure_node = m; + break; + } + } + if (!node->failure_node) + node->failure_node = thiz->root; +} + +/****************************************************************************** + * FUNCTION: ac_automata_traverse_setfailure + * Traverse all automata nodes using DFS (Depth First Search), meanwhile it set + * the failure node for every node it passes through. this function must be + * called after adding last pattern to automata. i.e. after calling this you + * can not add further pattern to automata. +******************************************************************************/ +static void ac_automata_traverse_setfailure + (AC_AUTOMATA_t * thiz, AC_NODE_t * node, AC_ALPHABET_t * alphas) +{ + unsigned int i; + AC_NODE_t * next; + + for (i=0; i < node->outgoing_degree; i++) + { + alphas[node->depth] = node->outgoing[i].alpha; + next = node->outgoing[i].next; + + /* At every node look for its failure node */ + ac_automata_set_failure (thiz, next, alphas); + + /* Recursively call itself to traverse all nodes */ + ac_automata_traverse_setfailure (thiz, next, alphas); + } +} diff --git a/src/main.cpp b/src/main.cpp new file mode 100644 index 0000000..3474c9f --- /dev/null +++ b/src/main.cpp @@ -0,0 +1,624 @@ +#include "main.h" +#include "worker.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "AhoCorasickPlus.h" +#include "patr.h" +#include "qdpi.h" +#include "sendertask.h" +#include "statistictask.h" + +struct ndpi_detection_module_struct* extFilter::my_ndpi_struct = NULL; +u_int32_t extFilter::ndpi_size_flow_struct = 0; +u_int32_t extFilter::ndpi_size_id_struct = 0; +u_int32_t extFilter::current_ndpi_memory = 0; +u_int32_t extFilter::max_ndpi_memory = 0; + +extFilter::extFilter(): _helpRequested(false), _listDPDKPorts(false), _nbRxQueues(1) +{ + _coreMaskToUse = pcpp::getCoreMaskForAllMachineCores(); +// Poco::ErrorHandler::set(&_errorHandler); +} + + +extFilter::~extFilter() +{ +} + +/** + * Prepare the configuration for each core. Configuration includes: which DpdkDevices and which RX queues to receive packets from, where to send the matched + * packets, etc. + */ +void prepareCoreConfiguration(std::vector& dpdkDevicesToUse, std::vector& coresToUse, WorkerConfig workerConfigArr[], int workerConfigArrLen, int nbRxQueues) +{ + // create a list of pairs of DpdkDevice and RX queues for all RX queues in all requested devices + int totalNumOfRxQueues = 0; + std::vector > deviceAndRxQVec; + for (std::vector::iterator iter = dpdkDevicesToUse.begin(); iter != dpdkDevicesToUse.end(); iter++) + { + if(nbRxQueues > (*iter)->getTotalNumOfRxQueues()) + nbRxQueues=(*iter)->getTotalNumOfRxQueues(); + for (int rxQueueIndex = 0; rxQueueIndex < nbRxQueues; rxQueueIndex++) + { + std::pair curPair(*iter, rxQueueIndex); + deviceAndRxQVec.push_back(curPair); + } + totalNumOfRxQueues += nbRxQueues; + } + + // calculate how many RX queues each core will read packets from. We divide the total number of RX queues with total number of core + int numOfRxQueuesPerCore = totalNumOfRxQueues / coresToUse.size(); + int rxQueuesRemainder = totalNumOfRxQueues % coresToUse.size(); + + // prepare the configuration for every core: divide the devices and RX queue for each device with the various cores + int i = 0; + std::vector >::iterator pairVecIter = deviceAndRxQVec.begin(); + for (std::vector::iterator iter = coresToUse.begin(); iter != coresToUse.end(); iter++) + { + printf("Using core %d\n", iter->Id); + workerConfigArr[i].CoreId = iter->Id; + + for (int rxQIndex = 0; rxQIndex < numOfRxQueuesPerCore; rxQIndex++) + { + if (pairVecIter == deviceAndRxQVec.end()) + break; + workerConfigArr[i].InDataCfg[pairVecIter->first].push_back(pairVecIter->second); + pairVecIter++; + } + if (rxQueuesRemainder > 0 && (pairVecIter != deviceAndRxQVec.end())) + { + workerConfigArr[i].InDataCfg[pairVecIter->first].push_back(pairVecIter->second); + pairVecIter++; + rxQueuesRemainder--; + } + + // print configuration for core + printf(" Core configuration:\n"); + for (InputDataConfig::iterator iter = workerConfigArr[i].InDataCfg.begin(); iter != workerConfigArr[i].InDataCfg.end(); iter++) + { + printf(" DPDK device#%d: ", iter->first->getDeviceId()); + for (std::vector::iterator iter2 = iter->second.begin(); iter2 != iter->second.end(); iter2++) + { + printf("RX-Queue#%d; ", *iter2); + + } + printf("\n"); + } + if (workerConfigArr[i].InDataCfg.size() == 0) + { + printf(" None\n"); + } + i++; + } + +} + +void extFilter::initialize(Application& self) +{ + loadConfiguration(); + ServerApplication::initialize(self); + + // initialize DPDK + if (!pcpp::DpdkDeviceList::initDpdk(_coreMaskToUse, _BufPoolSize)) + { + logger().fatal("Couldn't initialize DPDK!"); + throw Poco::Exception("Couldn't initialize DPDK"); + } + + + _lower_host=config().getBool("lower_host", false); + _match_url_exactly=config().getBool("match_url_exactly", false); + _block_undetected_ssl=config().getBool("block_undetected_ssl", false); + _http_redirect=config().getBool("http_redirect", true); + _statistic_interval=config().getInt("statistic_interval", 0); + _nbRxQueues=config().getInt("rx_queues", 1); + _BufPoolSize=config().getInt("mbuf_pool_size", DEFAULT_MBUF_POOL_SIZE); + _coreMaskToUse=config().getInt("core_mask", pcpp::getCoreMaskForAllMachineCores()); + + _urlsFile=config().getString("urllist",""); + _domainsFile=config().getString("domainlist",""); + _sslIpsFile=config().getString("sslips",""); + _sslFile=config().getString("ssllist",""); + _hostsFile=config().getString("hostlist",""); + + std::string http_code=config().getString("http_code",""); + if(!http_code.empty()) + { + http_code.erase(std::remove(http_code.begin(), http_code.end(), '"'), http_code.end()); + _sender_params.code=http_code; + logger().debug("HTTP code set to %s", http_code); + } + _sender_params.redirect_url=config().getString("redirect_url",""); + _sender_params.send_rst_to_server=config().getBool("rst_to_server",false); + + std::string add_p_type=config().getString("url_additional_info","none"); + std::transform(add_p_type.begin(), add_p_type.end(), add_p_type.begin(), ::tolower); + + std::map add_type_s; + add_type_s["none"]=A_TYPE_NONE; + add_type_s["line"]=A_TYPE_ID; + add_type_s["url"]=A_TYPE_URL; + + std::map::iterator it=add_type_s.find(add_p_type); + if(it == add_type_s.end()) + { + throw Poco::Exception("Unknown url_additional_info type '" + add_p_type + "'",404); + } + _add_p_type=it->second; + logger().debug("URL additional info set to %s", add_p_type); + + std::string dpdk_ports=config().getString("dpdk_ports",""); + if(!dpdk_ports.empty()) + { + Poco::StringTokenizer restTokenizer(dpdk_ports, ","); + for(Poco::StringTokenizer::Iterator itr=restTokenizer.begin(); itr!=restTokenizer.end(); ++itr) + { + _dpdkPortVec.push_back(Poco::NumberParser::parse(*itr)); + } + } + + if(_dpdkPortVec.empty()) + { + logger().fatal("DPDK ports not specified!"); + throw Poco::Exception("DPDK ports not specified!"); + } + // todo... ???? + + my_ndpi_struct = init_ndpi(); + + if (my_ndpi_struct == NULL) + { + logger().fatal("Can't initialize nDPI!"); + throw Poco::Exception("Can't initialize nDPI!"); + } + + std::string _protocolsFile=config().getString("protocols",""); + if(!_protocolsFile.empty()) + ndpi_load_protocols_file(my_ndpi_struct, (char *)_protocolsFile.c_str()); + + // Load sizes of main parsing structures + ndpi_size_id_struct = ndpi_detection_get_sizeof_ndpi_id_struct(); + ndpi_size_flow_struct = ndpi_detection_get_sizeof_ndpi_flow_struct(); + + // removing DPDK master core from core mask because DPDK worker threads cannot run on master core + _coreMaskToUse = _coreMaskToUse & ~(pcpp::DpdkDeviceList::getInstance().getDpdkMasterCore().Mask); +} + +void extFilter::uninitialize() +{ + logger().debug("Shutting down"); + ServerApplication::uninitialize(); +} + +void extFilter::defineOptions(Poco::Util::OptionSet& options) +{ + ServerApplication::defineOptions(options); + options.addOption( + Poco::Util::Option("help","h","Display help on command line arguments.") + .required(false) + .repeatable(false) + .callback(Poco::Util::OptionCallback(this,&extFilter::handleHelp))); + options.addOption( + Poco::Util::Option("list","l","Print the list of DPDK ports and exit.") + .required(false) + .repeatable(false) + .callback(Poco::Util::OptionCallback(this,&extFilter::printDPDKPorts))); + options.addOption( + Poco::Util::Option("config-file","f","Specify config file to read.") + .required(true) + .repeatable(false) + .argument("FILE")); + options.addOption( + Poco::Util::Option("dpdk-ports","d","A comma-separated list of the DPDK port numbers to receive packets from.") + .required(false) + .repeatable(false) + .argument("PORT_1...")); + options.addOption( + Poco::Util::Option("core-mask","c","Core mask of cores to use. For example: use 7 (binary 0111) to use cores 0,1,2. Default is using all cores except management core.") + .required(false) + .repeatable(false) + .argument("CORE_MASK")); + options.addOption( + Poco::Util::Option("mbuf-pool-size","m","DPDK mBuf pool size to initialize DPDK with. Default value is 4095.") + .required(false) + .repeatable(false) + .argument("POOL_SIZE")); +} + +void extFilter::handleOption(const std::string& name,const std::string& value) +{ + ServerApplication::handleOption(name, value); + if(name == "config-file") + { + loadConfiguration(value); + } + if(name == "core-mask") + { + _coreMaskToUse = Poco::NumberParser::parse(value); + } + if(name == "mbuf-pool-size") + { + _BufPoolSize = Poco::NumberParser::parse(value); + } + if(name == "dpdk-ports") + { + Poco::StringTokenizer restTokenizer(value, ","); + for(Poco::StringTokenizer::Iterator itr=restTokenizer.begin(); itr!=restTokenizer.end(); ++itr) + { + _dpdkPortVec.push_back(Poco::NumberParser::parse(*itr)); + } + } +/* if(name == "threads") + { + _cmd_threadsNum = Poco::NumberParser::parse(value); + }*/ +} + +void extFilter::handleHelp(const std::string& name,const std::string& value) +{ + _helpRequested=true; + displayHelp(); + stopOptionsProcessing(); +} + +void extFilter::displayHelp() +{ + Poco::Util::HelpFormatter helpFormatter(options()); + helpFormatter.setCommand(commandName()); + helpFormatter.setUsage("<-c config file> [options]"); + helpFormatter.setHeader("extFilter"); + helpFormatter.format(std::cout); +} + +void extFilter::printDPDKPorts(const std::string& name,const std::string& value) +{ + _listDPDKPorts=true; + stopOptionsProcessing(); + pcpp::CoreMask coreMaskToUse = pcpp::getCoreMaskForAllMachineCores(); + // initialize DPDK + if (!pcpp::DpdkDeviceList::initDpdk(coreMaskToUse, DEFAULT_MBUF_POOL_SIZE)) + { + logger().fatal("Couldn't initialize DPDK!"); + return; + } + + std::cout << "DPDK port list:" << std::endl; + + // go over all available DPDK devices and print info for each one + std::vector deviceList = pcpp::DpdkDeviceList::getInstance().getDpdkDeviceList(); + for (std::vector::iterator iter = deviceList.begin(); iter != deviceList.end(); iter++) + { + pcpp::DpdkDevice* dev = *iter; + printf("\tPort #%d: MAC address='%s'; PCI address='%s'; PMD='%s'\n", + dev->getDeviceId(), + dev->getMacAddress().toString().c_str(), + dev->getPciAddress().toString().c_str(), + dev->getPMDName().c_str()); + } + +} + +int extFilter::main(const ArgVec& args) +{ + if(!_helpRequested && !_listDPDKPorts) + { + std::map *ptr=(std::map *)pcpp::DnsLayer::getDNSPortMap(); + ptr->clear(); + + ptr=(std::map *)pcpp::SSLLayer::getSSLPortMap(); + ptr->clear(); + + ptr=(std::map *)pcpp::HttpMessage::getHTTPPortMap(); + ptr->clear(); + + +/* struct sigaction handler; + handler.sa_handler = handleSignal; + handler.sa_flags = 0; + sigemptyset(&handler.sa_mask); + sigaction(SIGHUP, &handler, NULL); + Poco::TaskManager tm; + tm.start(new NFQStatisticTask(_statistic_interval)); + tm.start(new nfqThread(_config)); + tm.start(new SenderTask(_sender_params)); + tm.start(new ReloadTask(this)); +*/ + + // extract core vector from core mask + std::vector coresToUse; + pcpp::createCoreVectorFromCoreMask(_coreMaskToUse, coresToUse); + + // collect the list of DPDK devices + std::vector dpdkDevicesToUse; + + for (std::vector::iterator iter = _dpdkPortVec.begin(); iter != _dpdkPortVec.end(); iter++) + { + pcpp::DpdkDevice* dev = pcpp::DpdkDeviceList::getInstance().getDeviceByPort(*iter); + if (dev == NULL) + { + logger().fatal("DPDK device for port %d doesn't exist", *iter); // XXX check it!!! + return Poco::Util::Application::EXIT_OK; + } + std::cout << "pushing device with port " << *iter << std::endl; + dpdkDevicesToUse.push_back(dev); + } + + // go over all devices and open them + for (std::vector::iterator iter = dpdkDevicesToUse.begin(); iter != dpdkDevicesToUse.end(); iter++) + { + std::cout << "total num of rx queue: " << (*iter)->getTotalNumOfRxQueues() << " total num of tx queues: " << (*iter)->getTotalNumOfTxQueues() << std::endl; + if (!(*iter)->openMultiQueues(_nbRxQueues, 1)) + { + logger().fatal("Couldn't open DPDK device #%d, PMD '%s'", (*iter)->getDeviceId(), (*iter)->getPMDName()); + return Poco::Util::Application::EXIT_OK; + } + } + + + WorkerConfig workerConfigArr[coresToUse.size()]; + prepareCoreConfiguration(dpdkDevicesToUse, coresToUse, workerConfigArr, coresToUse.size(),_nbRxQueues); + + // create worker thread for every core + std::vector workerThreadVec; + int i = 0; + for (std::vector::iterator iter = coresToUse.begin(); iter != coresToUse.end(); iter++) + { + if(!_urlsFile.empty()) + { + workerConfigArr[i].atm = new AhoCorasickPlus(); + loadURLs(_urlsFile, workerConfigArr[i].atm); + workerConfigArr[i].atm->finalize(); + } + if(!_domainsFile.empty()) + { + workerConfigArr[i].atmDomains = new AhoCorasickPlus(); + loadDomains(_domainsFile,workerConfigArr[i].atmDomains,&workerConfigArr[i].domainsMatchType); + workerConfigArr[i].atmDomains->finalize(); + } + if(!_sslIpsFile.empty() && _block_undetected_ssl) + { + workerConfigArr[i].block_undetected_ssl = true; + workerConfigArr[i].sslIPs = new Patricia(); + loadSSLIP(_sslIpsFile, workerConfigArr[i].sslIPs); + } + if(!_sslFile.empty()) + { + workerConfigArr[i].atmSSLDomains = new AhoCorasickPlus(); + loadDomains(_sslFile, workerConfigArr[i].atmSSLDomains,&workerConfigArr[i].SSLdomainsMatchType); + workerConfigArr[i].atmSSLDomains->finalize(); + } + if(!_hostsFile.empty()) + { + workerConfigArr[i].ipportMap = new IPPortMap; + loadHosts(_hostsFile,workerConfigArr[i].ipportMap); + } +// workerConfigArr[i].PathToWritePackets = "thread"+std::to_string(i)+".pcap"; + workerConfigArr[i].match_url_exactly = _match_url_exactly; + workerConfigArr[i].lower_host = _lower_host; + workerConfigArr[i].http_redirect = _http_redirect; + workerConfigArr[i].add_p_type = _add_p_type; + std::string workerName("WorkerThread " + std::to_string(i)); + WorkerThread* newWorker = new WorkerThread(workerName, workerConfigArr[i]); + workerThreadVec.push_back(newWorker); + i++; + } + + + Poco::TaskManager tm; + tm.start(new SenderTask(_sender_params)); + + logger().debug("Starting worker threads..."); + // start all worker threads + if (!pcpp::DpdkDeviceList::getInstance().startDpdkWorkerThreads(_coreMaskToUse, workerThreadVec)) + { + logger().fatal("Couldn't start worker threads"); + return Poco::Util::Application::EXIT_OK; + } + tm.start(new StatisticTask(_statistic_interval, workerThreadVec)); + waitForTerminationRequest(); + pcpp::DpdkDeviceList::getInstance().stopDpdkWorkerThreads(); + tm.cancelAll(); + SenderTask::queue.wakeUpAll(); + tm.joinAll(); + // stop worker threads + + for (std::vector::iterator iter = workerThreadVec.begin(); iter != workerThreadVec.end(); iter++) + { + WorkerThread* thread = (WorkerThread*)(*iter); + delete thread; + } + + } + return Poco::Util::Application::EXIT_OK; +} + +void extFilter::loadURLs(std::string &fn, AhoCorasickPlus *dm_atm) +{ + logger().debug("Loading URLS from file %s",fn); + Poco::FileInputStream uf(fn); + if(uf.good()) + { + int lineno=1; + while(!uf.eof()) + { + std::string str; + getline(uf,str); + if(!str.empty()) + { + if(str[0] == '#' || str[0] == ';') + continue; + AhoCorasickPlus::EnumReturnStatus status; + AhoCorasickPlus::PatternId patId = lineno; + std::string url = str; + std::size_t http_pos = url.find("http://"); + if(http_pos == std::string::npos || http_pos > 0) + { + url.insert(0,"http://"); + } + status = dm_atm->addPattern(url, patId); + if (status!=AhoCorasickPlus::RETURNSTATUS_SUCCESS) + { + if(status == AhoCorasickPlus::RETURNSTATUS_DUPLICATE_PATTERN) + { + logger().warning("Pattern '%s' already present in the URL database from file %s",str,fn); + } else { + logger().error("Failed to add '%s' from line %d from file %s",str,lineno,fn); + } + } + } + lineno++; + } + } else + throw Poco::OpenFileException(fn); + uf.close(); + logger().debug("Finish loading URLS"); +} + +void extFilter::loadDomains(std::string &fn, AhoCorasickPlus *dm_atm,DomainsMatchType *dm_map) +{ + logger().debug("Loading domains from file %s",fn); + Poco::FileInputStream df(fn); + if(df.good()) + { + int lineno=1; + while(!df.eof()) + { + std::string str; + getline(df,str); + if(!str.empty()) + { + if(str[0] == '#' || str[0] == ';') + continue; + AhoCorasickPlus::EnumReturnStatus status; + AhoCorasickPlus::PatternId patId = lineno; + std::size_t pos = str.find("*."); + bool exact_match=true; + std::string insert=str; + if(pos != std::string::npos) + { + exact_match=false; + insert=str.substr(pos+2,str.length()-2); + } + status = dm_atm->addPattern(insert, patId); + if (status!=AhoCorasickPlus::RETURNSTATUS_SUCCESS) + { + if(status == AhoCorasickPlus::RETURNSTATUS_DUPLICATE_PATTERN) + { + logger().warning("Pattern '%s' already present in the database from file %s",insert,fn); + } else { + logger().error("Failed to add '%s' from line %d from file %s",insert,lineno,fn); + } + } else { + std::pair res=dm_map->insert(DomainsMatchType::ValueType(lineno,exact_match)); + if(res.second) + { +// logger().debug("Inserted domain: '%s' from line %d from file %s",str,lineno,fn); + } else { + logger().debug("Updated domain: '%s' from line %d from file %s",str,lineno,fn); + } + } + } + lineno++; + } + } else + throw Poco::OpenFileException(fn); + df.close(); + logger().debug("Finish loading domains"); +} + +void extFilter::loadSSLIP(const std::string &fn, Patricia *patricia) +{ + logger().debug("Loading SSL ips from file %s",fn); + Poco::FileInputStream hf(fn); + if(hf.good()) + { + int lineno=1; + while(!hf.eof()) + { + std::string str; + getline(hf,str); + if(!str.empty()) + { + if(str[0] == '#' || str[0] == ';') + continue; + if(!patricia->make_and_lookup(str)) + { + logger().information("Unable to add IP address %s from line %d to the SSL IPs list", str, lineno); + } + } + lineno++; + } + } else + throw Poco::OpenFileException(fn); + hf.close(); + logger().debug("Finish loading SSL ips"); +} + +void extFilter::loadHosts(std::string &fn,IPPortMap *ippm) +{ + logger().debug("Loading ip:port from file %s",fn); + Poco::FileInputStream hf(fn); + if(hf.good()) + { + int lineno=1; + while(!hf.eof()) + { + std::string str; + getline(hf,str); + if(!str.empty()) + { + if(str[0] == '#' || str[0] == ';') + continue; + std::size_t found=str.find(":"); + std::string ip=str.substr(0, found); + std::string port; + unsigned short porti=0; + if(found != std::string::npos) + { + port=str.substr(found+1,str.length()); + logger().debug("IP is %s port %s",ip,port); + porti=atoi(port.c_str()); + } else { + logger().debug("IP %s without port", ip); + } + Poco::Net::IPAddress ip_addr(ip); + IPPortMap::iterator it=ippm->find(ip_addr); + if(it == ippm->end()) + { + std::set ports; + if(porti) + { + logger().debug("Adding port %s to ip %s", port, ip); + ports.insert(porti); + } + ippm->insert(std::make_pair(ip_addr,ports)); + logger().debug("Inserted ip: %s from line %d", ip, lineno); + } else { + logger().debug("Adding port %s from line %d to ip %s", port,lineno,ip); + it->second.insert(porti); + } + + } + lineno++; + } + } else + throw Poco::OpenFileException(fn); + hf.close(); + logger().debug("Finish ip:port"); +} + + +POCO_SERVER_MAIN(extFilter) diff --git a/src/node.cpp b/src/node.cpp new file mode 100644 index 0000000..8d02025 --- /dev/null +++ b/src/node.cpp @@ -0,0 +1,254 @@ +/* + * node.c: implementation of automata node + * This file is part of multifast. + * + Copyright 2010-2013 Kamiar Kanani + + multifast is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + multifast is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with multifast. If not, see . +*/ + +#include +#include +#include +#include "node.h" + +/* reallocation step for AC_NODE_t.matched_patterns */ +#define REALLOC_CHUNK_MATCHSTR 1 + +/* reallocation step for AC_NODE_t.outgoing array */ +#define REALLOC_CHUNK_OUTGOING 1 +/* For different node depth, number of outgoing edges differs considerably + * if you care about preprocessing speed, you can set a higher value for + * reallocation step size to prevent multiple reallocations. + */ + +/* Private function prototype */ +static void node_init (AC_NODE_t * thiz); +static int node_edge_compare (const void * l, const void * r); +static int node_has_matchstr (AC_NODE_t * thiz, AC_PATTERN_t * newstr); + + +/****************************************************************************** + * FUNCTION: node_create + * Create the node +******************************************************************************/ +struct AC_NODE * node_create(void) +{ + AC_NODE_t * thiz; + thiz = (AC_NODE_t *) malloc (sizeof(AC_NODE_t)); + node_init(thiz); + node_assign_id(thiz); + return thiz; +} + +/****************************************************************************** + * FUNCTION: node_init + * Initialize node +******************************************************************************/ +void node_init(AC_NODE_t * thiz) +{ + memset(thiz, 0, sizeof(AC_NODE_t)); + + thiz->outgoing_max = REALLOC_CHUNK_OUTGOING; + thiz->outgoing = (struct edge *) malloc + (thiz->outgoing_max*sizeof(struct edge)); + + thiz->matched_patterns_max = REALLOC_CHUNK_MATCHSTR; + thiz->matched_patterns = (AC_PATTERN_t *) malloc + (thiz->matched_patterns_max*sizeof(AC_PATTERN_t)); +} + +/****************************************************************************** + * FUNCTION: node_release + * Release node +******************************************************************************/ +void node_release(AC_NODE_t * thiz) +{ + free(thiz->matched_patterns); + free(thiz->outgoing); + free(thiz); +} + +/****************************************************************************** + * FUNCTION: node_find_next + * Find out the next node for a given Alpha to move. this function is used in + * the pre-processing stage in which edge array is not sorted. so it uses + * linear search. +******************************************************************************/ +AC_NODE_t * node_find_next(AC_NODE_t * thiz, AC_ALPHABET_t alpha) +{ + int i; + + for (i=0; i < thiz->outgoing_degree; i++) + { + if(thiz->outgoing[i].alpha == alpha) + return (thiz->outgoing[i].next); + } + return NULL; +} + +/****************************************************************************** + * FUNCTION: node_findbs_next + * Find out the next node for a given Alpha. this function is used after the + * pre-processing stage in which we sort edges. so it uses Binary Search. +******************************************************************************/ +AC_NODE_t * node_findbs_next (AC_NODE_t * thiz, AC_ALPHABET_t alpha) +{ + int min, max, mid; + AC_ALPHABET_t amid; + + min = 0; + max = thiz->outgoing_degree - 1; + + while (min <= max) + { + mid = (min+max) >> 1; + amid = thiz->outgoing[mid].alpha; + if (alpha > amid) + min = mid + 1; + else if (alpha < amid) + max = mid - 1; + else + return (thiz->outgoing[mid].next); + } + return NULL; +} + +/****************************************************************************** + * FUNCTION: node_has_matchstr + * Determine if a final node contains a pattern in its accepted pattern list + * or not. return values: 1 = it has, 0 = it hasn't +******************************************************************************/ +int node_has_matchstr (AC_NODE_t * thiz, AC_PATTERN_t * newstr) +{ + int i, j; + AC_PATTERN_t * str; + + for (i=0; i < thiz->matched_patterns_num; i++) + { + str = &thiz->matched_patterns[i]; + + if (str->length != newstr->length) + continue; + + for (j=0; jlength; j++) + if(str->astring[j] != newstr->astring[j]) + continue; + + if (j == str->length) + return 1; + } + return 0; +} + +/****************************************************************************** + * FUNCTION: node_create_next + * Create the next node for the given alpha. +******************************************************************************/ +AC_NODE_t * node_create_next (AC_NODE_t * thiz, AC_ALPHABET_t alpha) +{ + AC_NODE_t * next; + next = node_find_next (thiz, alpha); + if (next) + /* The edge already exists */ + return NULL; + /* Otherwise register new edge */ + next = node_create (); + node_register_outgoing(thiz, next, alpha); + + return next; +} + +/****************************************************************************** + * FUNCTION: node_register_matchstr + * Adds the pattern to the list of accepted pattern. +******************************************************************************/ +void node_register_matchstr (AC_NODE_t * thiz, AC_PATTERN_t * str) +{ + /* Check if the new pattern already exists in the node list */ + if (node_has_matchstr(thiz, str)) + return; + + /* Manage memory */ + if (thiz->matched_patterns_num >= thiz->matched_patterns_max) + { + thiz->matched_patterns_max += REALLOC_CHUNK_MATCHSTR; + thiz->matched_patterns = (AC_PATTERN_t *) realloc + (thiz->matched_patterns, thiz->matched_patterns_max*sizeof(AC_PATTERN_t)); + } + + thiz->matched_patterns[thiz->matched_patterns_num].astring = str->astring; + thiz->matched_patterns[thiz->matched_patterns_num].length = str->length; + thiz->matched_patterns[thiz->matched_patterns_num].rep = str->rep; + thiz->matched_patterns_num++; +} + +/****************************************************************************** + * FUNCTION: node_register_outgoing + * Establish an edge between two nodes +******************************************************************************/ +void node_register_outgoing + (AC_NODE_t * thiz, AC_NODE_t * next, AC_ALPHABET_t alpha) +{ + if(thiz->outgoing_degree >= thiz->outgoing_max) + { + thiz->outgoing_max += REALLOC_CHUNK_OUTGOING; + thiz->outgoing = (struct edge *) realloc + (thiz->outgoing, thiz->outgoing_max*sizeof(struct edge)); + } + + thiz->outgoing[thiz->outgoing_degree].alpha = alpha; + thiz->outgoing[thiz->outgoing_degree++].next = next; +} + +/****************************************************************************** + * FUNCTION: node_assign_id + * assign a unique ID to the node (used for debugging purpose). +******************************************************************************/ +void node_assign_id (AC_NODE_t * thiz) +{ + static int unique_id = 1; + thiz->id = unique_id ++; +} + +/****************************************************************************** + * FUNCTION: node_edge_compare + * Comparison function for qsort. see man qsort. +******************************************************************************/ +int node_edge_compare (const void * l, const void * r) +{ + /* According to man page: + * The comparison function must return an integer less than, equal to, or + * greater than zero if the first argument is considered to be + * respectively less than, equal to, or greater than the second. if two + * members compare as equal, their order in the sorted array is undefined. + * + * NOTE: Because edge alphabets are unique in every node we ignore + * equivalence case. + **/ + if ( ((struct edge *)l)->alpha >= ((struct edge *)r)->alpha ) + return 1; + else + return -1; +} + +/****************************************************************************** + * FUNCTION: node_sort_edges + * sorts edges alphabets. +******************************************************************************/ +void node_sort_edges (AC_NODE_t * thiz) +{ + qsort ((void *)thiz->outgoing, thiz->outgoing_degree, sizeof(struct edge), + node_edge_compare); +} diff --git a/src/patr.cpp b/src/patr.cpp new file mode 100644 index 0000000..bb738af --- /dev/null +++ b/src/patr.cpp @@ -0,0 +1,128 @@ +/* +* +* Copyright (C) Max +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +* +*/ + +#include "patricia.h" +#include "patr.h" +#include +#include + + +Patricia::Patricia() +{ + tree_ipv4 = New_Patricia(32); + tree_ipv6 = New_Patricia(128); +} + +Patricia::~Patricia() +{ + Destroy_Patricia(tree_ipv4, nullptr); + Destroy_Patricia(tree_ipv6, nullptr); +} + + +patricia_node_t *Patricia::make_and_lookup(std::string &description) +{ + prefix_t prefix; + patricia_node_t *node; + Poco::Net::IPAddress mask; + Poco::Net::IPAddress address; + if(description.empty()) + return nullptr; + std::size_t slash=description.find('/'); + if(slash != std::string::npos) + { + std::string addr=description.substr(0,slash); + std::string msk_t=description.substr(slash+1,description.size()); + if(Poco::Net::IPAddress::tryParse(addr,address)) + { + int msk = std::stoi(msk_t,nullptr); + if(address.family() == Poco::Net::IPAddress::IPv4) + { + if(msk > 32) + { + return nullptr; + } + Poco::Net::IPAddress msk1(msk, Poco::Net::IPAddress::IPv4); + mask=msk1; + } else { + if(msk > 128) + { + return nullptr; + } + mask=Poco::Net::IPAddress(msk,Poco::Net::IPAddress::IPv6); + } + } else + return nullptr; + } else { + if(Poco::Net::IPAddress::tryParse(description,address)) + { + if(address.family() == Poco::Net::IPAddress::IPv4) + mask=Poco::Net::IPAddress(32,Poco::Net::IPAddress::IPv4); + else + mask=Poco::Net::IPAddress(128,Poco::Net::IPAddress::IPv6); + } else + return nullptr; + } + if(!fill_prefix(address.family() == Poco::Net::IPAddress::IPv4 ? AF_INET : AF_INET6,(void *)address.addr(),mask.prefixLength(),prefix)) + return nullptr; + node = patricia_lookup (address.family() == Poco::Net::IPAddress::IPv4 ? tree_ipv4 : tree_ipv6, &prefix); + return (node); +} + +patricia_node_t *Patricia::try_search_exact_ip(Poco::Net::IPAddress &address) +{ + prefix_t prefix; + patricia_node_t *node; + if(!fill_prefix(address.family() == Poco::Net::IPAddress::IPv4 ? AF_INET : AF_INET6,(void *)address.addr(),address.family() == Poco::Net::IPAddress::IPv4 ? 32 : 128, prefix)) + return nullptr; + node=patricia_search_exact (address.family() == Poco::Net::IPAddress::IPv4 ? tree_ipv4 : tree_ipv6, &prefix); + return (node); +} + +bool Patricia::fill_prefix(int family, void *dest, int bitlen, prefix_t &prefix) +{ + int default_bitlen = sizeof(struct in_addr) * 8; + if(family == AF_INET6) + { + default_bitlen = sizeof(struct in6_addr) * 8; + memcpy (&prefix.add.sin6, dest, sizeof(struct in6_addr)); + } else if (family == AF_INET) + { + memcpy (&prefix.add.sin, dest, sizeof(struct in_addr)); + } else { + return false; + } + prefix.bitlen = (bitlen >= 0)? bitlen: default_bitlen; + prefix.family = family; + prefix.ref_count = 0; + return true; +} + +void Patricia::print_all_nodes() +{ + patricia_node_t *node; + std::cout << "IPv4 nodes:" << std::endl; + PATRICIA_WALK(tree_ipv4->head, node) { + std::cout << "node: " << prefix_toa(node->prefix) << "/" << node->prefix->bitlen << std::endl; + } PATRICIA_WALK_END; + std::cout << "IPv6 nodes:" << std::endl; + PATRICIA_WALK(tree_ipv6->head, node) { + std::cout << "node: " << prefix_toa(node->prefix) << "/" << node->prefix->bitlen << std::endl; + } PATRICIA_WALK_END; +} diff --git a/src/patricia.c b/src/patricia.c new file mode 100644 index 0000000..477001d --- /dev/null +++ b/src/patricia.c @@ -0,0 +1,833 @@ +/* + * $Id: patricia.c,v 1.7 2005/12/07 20:46:41 dplonka Exp $ + * Dave Plonka + * + * This product includes software developed by the University of Michigan, + * Merit Network, Inc., and their contributors. + * + * This file had been called "radix.c" in the MRT sources. + * + * I renamed it to "patricia.c" since it's not an implementation of a general + * radix trie. Also I pulled in various requirements from "prefix.c" and + * "demo.c" so that it could be used as a standalone API. + */ + + +static char copyright[] = +"This product includes software developed by the University of Michigan, Merit" +"Network, Inc., and their contributors."; + +#include /* assert */ +#include /* isdigit */ +#include /* errno */ +#include /* sin */ +#include /* NULL */ +#include /* sprintf, fprintf, stderr */ +#include /* free, atol, calloc */ +#include /* memcpy, strchr, strlen */ +#include /* BSD: for inet_addr */ +#include /* BSD, Linux: for inet_addr */ +#include /* BSD, Linux: for inet_addr */ +#include /* BSD, Linux, Solaris: for inet_addr */ + +#include "patricia.h" + +#define Delete free + +/* { from prefix.c */ + +/* prefix_tochar + * convert prefix information to bytes + */ +u_char * +prefix_tochar (prefix_t * prefix) +{ + if (prefix == NULL) + return (NULL); + + return ((u_char *) & prefix->add.sin); +} + +int +comp_with_mask (void *addr, void *dest, u_int mask) +{ + + if ( /* mask/8 == 0 || */ memcmp (addr, dest, mask / 8) == 0) { + int n = mask / 8; + int m = ((-1) << (8 - (mask % 8))); + + if (mask % 8 == 0 || (((u_char *)addr)[n] & m) == (((u_char *)dest)[n] & m)) + return (1); + } + return (0); +} + + +#define PATRICIA_MAX_THREADS 16 + +/* + * convert prefix information to ascii string with length + * thread safe and (almost) re-entrant implementation + */ +char * +prefix_toa2x (prefix_t *prefix, char *buff, int with_len) +{ + if (prefix == NULL) + return ("(Null)"); + assert (prefix->ref_count >= 0); + if (buff == NULL) { + + struct buffer { + char buffs[PATRICIA_MAX_THREADS][48+5]; + u_int i; + } *buffp; + +# if 0 + THREAD_SPECIFIC_DATA (struct buffer, buffp, 1); +# else + { /* for scope only */ + static struct buffer local_buff; + buffp = &local_buff; + } +# endif + if (buffp == NULL) { + /* XXX should we report an error? */ + return (NULL); + } + + buff = buffp->buffs[buffp->i++%PATRICIA_MAX_THREADS]; + } + if (prefix->family == AF_INET) { + u_char *a; + assert (prefix->bitlen <= sizeof(struct in_addr) * 8); + a = prefix_touchar (prefix); + if (with_len) { + sprintf (buff, "%d.%d.%d.%d/%d", a[0], a[1], a[2], a[3], + prefix->bitlen); + } + else { + sprintf (buff, "%d.%d.%d.%d", a[0], a[1], a[2], a[3]); + } + return (buff); + } +#ifdef HAVE_IPV6 + else if (prefix->family == AF_INET6) { + char *r; + r = (char *) inet_ntop (AF_INET6, &prefix->add.sin6, buff, 48 /* a guess value */ ); + if (r && with_len) { + assert (prefix->bitlen <= sizeof(struct in6_addr) * 8); + sprintf (buff + strlen (buff), "/%d", prefix->bitlen); + } + return (buff); + } +#endif /* HAVE_IPV6 */ + else + return (NULL); +} + +/* prefix_toa2 + * convert prefix information to ascii string + */ +char * +prefix_toa2 (prefix_t *prefix, char *buff) +{ + return (prefix_toa2x (prefix, buff, 0)); +} + +/* prefix_toa + */ +char * +prefix_toa (prefix_t * prefix) +{ + return (prefix_toa2 (prefix, (char *) NULL)); +} + +prefix_t * +New_Prefix2 (int family, void *dest, int bitlen, prefix_t *prefix) +{ + int dynamic_allocated = 0; + int default_bitlen = sizeof(struct in_addr) * 8; + +#ifdef HAVE_IPV6 + if (family == AF_INET6) { + default_bitlen = sizeof(struct in6_addr) * 8; + if (prefix == NULL) { + prefix = (prefix_t *)calloc(1, sizeof (prefix_t)); + dynamic_allocated++; + } + memcpy (&prefix->add.sin6, dest, sizeof(struct in6_addr)); + } + else +#endif /* HAVE_IPV6 */ + if (family == AF_INET) { + if (prefix == NULL) { +#ifndef NT + prefix = (prefix_t *)calloc(1, sizeof (prefix4_t)); +#else + //for some reason, compiler is getting + //prefix4_t size incorrect on NT + prefix = (prefix_t *)calloc(1, sizeof (prefix_t)); +#endif /* NT */ + + dynamic_allocated++; + } + memcpy (&prefix->add.sin, dest, sizeof(struct in_addr)); + } + else { + return (NULL); + } + + prefix->bitlen = (bitlen >= 0)? bitlen: default_bitlen; + prefix->family = family; + prefix->ref_count = 0; + if (dynamic_allocated) { + prefix->ref_count++; + } +/* fprintf(stderr, "[C %s, %d]\n", prefix_toa (prefix), prefix->ref_count); */ + return (prefix); +} + +prefix_t * +New_Prefix (int family, void *dest, int bitlen) +{ + return (New_Prefix2 (family, dest, bitlen, NULL)); +} + + +prefix_t * +Ref_Prefix (prefix_t * prefix) +{ + if (prefix == NULL) + return (NULL); + if (prefix->ref_count == 0) { + /* make a copy in case of a static prefix */ + return (New_Prefix2 (prefix->family, &prefix->add, prefix->bitlen, NULL)); + } + prefix->ref_count++; +/* fprintf(stderr, "[A %s, %d]\n", prefix_toa (prefix), prefix->ref_count); */ + return (prefix); +} + +void +Deref_Prefix (prefix_t * prefix) +{ + if (prefix == NULL) + return; + /* for secure programming, raise an assert. no static prefix can call this */ + assert (prefix->ref_count > 0); + + prefix->ref_count--; + assert (prefix->ref_count >= 0); + if (prefix->ref_count <= 0) { + Delete (prefix); + return; + } +} + +/* } */ + +/* #define PATRICIA_DEBUG 1 */ + +static int num_active_patricia = 0; + +/* these routines support continuous mask only */ + +patricia_tree_t * +New_Patricia (int maxbits) +{ + patricia_tree_t *patricia = (patricia_tree_t *)calloc(1, sizeof *patricia); + + patricia->maxbits = maxbits; + patricia->head = NULL; + patricia->num_active_node = 0; + assert (maxbits <= PATRICIA_MAXBITS); /* XXX */ + num_active_patricia++; + return (patricia); +} + + +/* + * if func is supplied, it will be called as func(node->data) + * before deleting the node + */ + +void +Clear_Patricia (patricia_tree_t *patricia, void_fn_t func) +{ + assert (patricia); + if (patricia->head) { + + patricia_node_t *Xstack[PATRICIA_MAXBITS+1]; + patricia_node_t **Xsp = Xstack; + patricia_node_t *Xrn = patricia->head; + + while (Xrn) { + patricia_node_t *l = Xrn->l; + patricia_node_t *r = Xrn->r; + + if (Xrn->prefix) { + Deref_Prefix (Xrn->prefix); + if (Xrn->data && func) + func (Xrn->data); + } + else { + assert (Xrn->data == NULL); + } + Delete (Xrn); + patricia->num_active_node--; + + if (l) { + if (r) { + *Xsp++ = r; + } + Xrn = l; + } else if (r) { + Xrn = r; + } else if (Xsp != Xstack) { + Xrn = *(--Xsp); + } else { + Xrn = NULL; + } + } + } + assert (patricia->num_active_node == 0); + /* Delete (patricia); */ +} + + +void +Destroy_Patricia (patricia_tree_t *patricia, void_fn_t func) +{ + Clear_Patricia (patricia, func); + Delete (patricia); + num_active_patricia--; +} + + +/* + * if func is supplied, it will be called as func(node->prefix, node->data) + */ + +void +patricia_process (patricia_tree_t *patricia, void_fn_t func) +{ + patricia_node_t *node; + assert (func); + + PATRICIA_WALK (patricia->head, node) { + func (node->prefix, node->data); + } PATRICIA_WALK_END; +} + +size_t +patricia_walk_inorder(patricia_node_t *node, void_fn_t func) +{ + size_t n = 0; + assert(func); + + if (node->l) { + n += patricia_walk_inorder(node->l, func); + } + + if (node->prefix) { + func(node->prefix, node->data); + n++; + } + + if (node->r) { + n += patricia_walk_inorder(node->r, func); + } + + return n; +} + + +patricia_node_t * +patricia_search_exact (patricia_tree_t *patricia, prefix_t *prefix) +{ + patricia_node_t *node; + u_char *addr; + u_int bitlen; + + assert (patricia); + assert (prefix); + assert (prefix->bitlen <= patricia->maxbits); + + if (patricia->head == NULL) + return (NULL); + + node = patricia->head; + addr = prefix_touchar (prefix); + bitlen = prefix->bitlen; + + while (node->bit < bitlen) { + + if (BIT_TEST (addr[node->bit >> 3], 0x80 >> (node->bit & 0x07))) { +#ifdef PATRICIA_DEBUG + if (node->prefix) + fprintf (stderr, "patricia_search_exact: take right %s/%d\n", + prefix_toa (node->prefix), node->prefix->bitlen); + else + fprintf (stderr, "patricia_search_exact: take right at %u\n", + node->bit); +#endif /* PATRICIA_DEBUG */ + node = node->r; + } + else { +#ifdef PATRICIA_DEBUG + if (node->prefix) + fprintf (stderr, "patricia_search_exact: take left %s/%d\n", + prefix_toa (node->prefix), node->prefix->bitlen); + else + fprintf (stderr, "patricia_search_exact: take left at %u\n", + node->bit); +#endif /* PATRICIA_DEBUG */ + node = node->l; + } + + if (node == NULL) + return (NULL); + } + +#ifdef PATRICIA_DEBUG + if (node->prefix) + fprintf (stderr, "patricia_search_exact: stop at %s/%d\n", + prefix_toa (node->prefix), node->prefix->bitlen); + else + fprintf (stderr, "patricia_search_exact: stop at %u\n", node->bit); +#endif /* PATRICIA_DEBUG */ + if (node->bit > bitlen || node->prefix == NULL) + return (NULL); + assert (node->bit == bitlen); + assert (node->bit == node->prefix->bitlen); + if (comp_with_mask (prefix_tochar (node->prefix), prefix_tochar (prefix), + bitlen)) { +#ifdef PATRICIA_DEBUG + fprintf (stderr, "patricia_search_exact: found %s/%d\n", + prefix_toa (node->prefix), node->prefix->bitlen); +#endif /* PATRICIA_DEBUG */ + return (node); + } + return (NULL); +} + + +/* if inclusive != 0, "best" may be the given prefix itself */ +patricia_node_t * +patricia_search_best2 (patricia_tree_t *patricia, prefix_t *prefix, int inclusive) +{ + patricia_node_t *node; + patricia_node_t *stack[PATRICIA_MAXBITS + 1]; + u_char *addr; + u_int bitlen; + int cnt = 0; + + assert (patricia); + assert (prefix); + assert (prefix->bitlen <= patricia->maxbits); + + if (patricia->head == NULL) + return (NULL); + + node = patricia->head; + addr = prefix_touchar (prefix); + bitlen = prefix->bitlen; + + while (node->bit < bitlen) { + + if (node->prefix) { +#ifdef PATRICIA_DEBUG + fprintf (stderr, "patricia_search_best: push %s/%d\n", + prefix_toa (node->prefix), node->prefix->bitlen); +#endif /* PATRICIA_DEBUG */ + stack[cnt++] = node; + } + + if (BIT_TEST (addr[node->bit >> 3], 0x80 >> (node->bit & 0x07))) { +#ifdef PATRICIA_DEBUG + if (node->prefix) + fprintf (stderr, "patricia_search_best: take right %s/%d\n", + prefix_toa (node->prefix), node->prefix->bitlen); + else + fprintf (stderr, "patricia_search_best: take right at %u\n", + node->bit); +#endif /* PATRICIA_DEBUG */ + node = node->r; + } + else { +#ifdef PATRICIA_DEBUG + if (node->prefix) + fprintf (stderr, "patricia_search_best: take left %s/%d\n", + prefix_toa (node->prefix), node->prefix->bitlen); + else + fprintf (stderr, "patricia_search_best: take left at %u\n", + node->bit); +#endif /* PATRICIA_DEBUG */ + node = node->l; + } + + if (node == NULL) + break; + } + + if (inclusive && node && node->prefix) + stack[cnt++] = node; + +#ifdef PATRICIA_DEBUG + if (node == NULL) + fprintf (stderr, "patricia_search_best: stop at null\n"); + else if (node->prefix) + fprintf (stderr, "patricia_search_best: stop at %s/%d\n", + prefix_toa (node->prefix), node->prefix->bitlen); + else + fprintf (stderr, "patricia_search_best: stop at %u\n", node->bit); +#endif /* PATRICIA_DEBUG */ + + if (cnt <= 0) + return (NULL); + + while (--cnt >= 0) { + node = stack[cnt]; +#ifdef PATRICIA_DEBUG + fprintf (stderr, "patricia_search_best: pop %s/%d\n", + prefix_toa (node->prefix), node->prefix->bitlen); +#endif /* PATRICIA_DEBUG */ + if (comp_with_mask (prefix_tochar (node->prefix), + prefix_tochar (prefix), + node->prefix->bitlen) && node->prefix->bitlen <= bitlen) { +#ifdef PATRICIA_DEBUG + fprintf (stderr, "patricia_search_best: found %s/%d\n", + prefix_toa (node->prefix), node->prefix->bitlen); +#endif /* PATRICIA_DEBUG */ + return (node); + } + } + return (NULL); +} + + +patricia_node_t * +patricia_search_best (patricia_tree_t *patricia, prefix_t *prefix) +{ + return (patricia_search_best2 (patricia, prefix, 1)); +} + + +patricia_node_t * +patricia_lookup (patricia_tree_t *patricia, prefix_t *prefix) +{ + patricia_node_t *node, *new_node, *parent, *glue; + u_char *addr, *test_addr; + u_int bitlen, check_bit, differ_bit; + int i, j, r; + + assert (patricia); + assert (prefix); + assert (prefix->bitlen <= patricia->maxbits); + + if (patricia->head == NULL) { + node = (patricia_node_t *)calloc(1, sizeof *node); + node->bit = prefix->bitlen; + node->prefix = Ref_Prefix (prefix); + node->parent = NULL; + node->l = node->r = NULL; + node->data = NULL; + patricia->head = node; +#ifdef PATRICIA_DEBUG + fprintf (stderr, "patricia_lookup: new_node #0 %s/%d (head)\n", + prefix_toa (prefix), prefix->bitlen); +#endif /* PATRICIA_DEBUG */ + patricia->num_active_node++; + return (node); + } + + addr = prefix_touchar (prefix); + bitlen = prefix->bitlen; + node = patricia->head; + + while (node->bit < bitlen || node->prefix == NULL) { + + if (node->bit < patricia->maxbits && + BIT_TEST (addr[node->bit >> 3], 0x80 >> (node->bit & 0x07))) { + if (node->r == NULL) + break; +#ifdef PATRICIA_DEBUG + if (node->prefix) + fprintf (stderr, "patricia_lookup: take right %s/%d\n", + prefix_toa (node->prefix), node->prefix->bitlen); + else + fprintf (stderr, "patricia_lookup: take right at %u\n", node->bit); +#endif /* PATRICIA_DEBUG */ + node = node->r; + } + else { + if (node->l == NULL) + break; +#ifdef PATRICIA_DEBUG + if (node->prefix) + fprintf (stderr, "patricia_lookup: take left %s/%d\n", + prefix_toa (node->prefix), node->prefix->bitlen); + else + fprintf (stderr, "patricia_lookup: take left at %u\n", node->bit); +#endif /* PATRICIA_DEBUG */ + node = node->l; + } + + assert (node); + } + + assert (node->prefix); +#ifdef PATRICIA_DEBUG + fprintf (stderr, "patricia_lookup: stop at %s/%d\n", + prefix_toa (node->prefix), node->prefix->bitlen); +#endif /* PATRICIA_DEBUG */ + + test_addr = prefix_touchar (node->prefix); + /* find the first bit different */ + check_bit = (node->bit < bitlen)? node->bit: bitlen; + differ_bit = 0; + for (i = 0; i*8 < check_bit; i++) { + if ((r = (addr[i] ^ test_addr[i])) == 0) { + differ_bit = (i + 1) * 8; + continue; + } + /* I know the better way, but for now */ + for (j = 0; j < 8; j++) { + if (BIT_TEST (r, (0x80 >> j))) + break; + } + /* must be found */ + assert (j < 8); + differ_bit = i * 8 + j; + break; + } + if (differ_bit > check_bit) + differ_bit = check_bit; +#ifdef PATRICIA_DEBUG + fprintf (stderr, "patricia_lookup: differ_bit %d\n", differ_bit); +#endif /* PATRICIA_DEBUG */ + + parent = node->parent; + while (parent && parent->bit >= differ_bit) { + node = parent; + parent = node->parent; +#ifdef PATRICIA_DEBUG + if (node->prefix) + fprintf (stderr, "patricia_lookup: up to %s/%d\n", + prefix_toa (node->prefix), node->prefix->bitlen); + else + fprintf (stderr, "patricia_lookup: up to %u\n", node->bit); +#endif /* PATRICIA_DEBUG */ + } + + if (differ_bit == bitlen && node->bit == bitlen) { + if (node->prefix) { +#ifdef PATRICIA_DEBUG + fprintf (stderr, "patricia_lookup: found %s/%d\n", + prefix_toa (node->prefix), node->prefix->bitlen); +#endif /* PATRICIA_DEBUG */ + return (node); + } + node->prefix = Ref_Prefix (prefix); +#ifdef PATRICIA_DEBUG + fprintf (stderr, "patricia_lookup: new node #1 %s/%d (glue mod)\n", + prefix_toa (prefix), prefix->bitlen); +#endif /* PATRICIA_DEBUG */ + assert (node->data == NULL); + return (node); + } + + new_node = (patricia_node_t *) calloc(1, sizeof *new_node); + new_node->bit = prefix->bitlen; + new_node->prefix = Ref_Prefix (prefix); + new_node->parent = NULL; + new_node->l = new_node->r = NULL; + new_node->data = NULL; + patricia->num_active_node++; + + if (node->bit == differ_bit) { + new_node->parent = node; + if (node->bit < patricia->maxbits && + BIT_TEST (addr[node->bit >> 3], 0x80 >> (node->bit & 0x07))) { + assert (node->r == NULL); + node->r = new_node; + } + else { + assert (node->l == NULL); + node->l = new_node; + } +#ifdef PATRICIA_DEBUG + fprintf (stderr, "patricia_lookup: new_node #2 %s/%d (child)\n", + prefix_toa (prefix), prefix->bitlen); +#endif /* PATRICIA_DEBUG */ + return (new_node); + } + + if (bitlen == differ_bit) { + if (bitlen < patricia->maxbits && + BIT_TEST (test_addr[bitlen >> 3], 0x80 >> (bitlen & 0x07))) { + new_node->r = node; + } + else { + new_node->l = node; + } + new_node->parent = node->parent; + if (node->parent == NULL) { + assert (patricia->head == node); + patricia->head = new_node; + } + else if (node->parent->r == node) { + node->parent->r = new_node; + } + else { + node->parent->l = new_node; + } + node->parent = new_node; +#ifdef PATRICIA_DEBUG + fprintf (stderr, "patricia_lookup: new_node #3 %s/%d (parent)\n", + prefix_toa (prefix), prefix->bitlen); +#endif /* PATRICIA_DEBUG */ + } + else { + glue = (patricia_node_t *) calloc(1, sizeof *glue); + glue->bit = differ_bit; + glue->prefix = NULL; + glue->parent = node->parent; + glue->data = NULL; + patricia->num_active_node++; + if (differ_bit < patricia->maxbits && + BIT_TEST (addr[differ_bit >> 3], 0x80 >> (differ_bit & 0x07))) { + glue->r = new_node; + glue->l = node; + } + else { + glue->r = node; + glue->l = new_node; + } + new_node->parent = glue; + + if (node->parent == NULL) { + assert (patricia->head == node); + patricia->head = glue; + } + else if (node->parent->r == node) { + node->parent->r = glue; + } + else { + node->parent->l = glue; + } + node->parent = glue; +#ifdef PATRICIA_DEBUG + fprintf (stderr, "patricia_lookup: new_node #4 %s/%d (glue+node)\n", + prefix_toa (prefix), prefix->bitlen); +#endif /* PATRICIA_DEBUG */ + } + return (new_node); +} + + +void +patricia_remove (patricia_tree_t *patricia, patricia_node_t *node) +{ + patricia_node_t *parent, *child; + + assert (patricia); + assert (node); + + if (node->r && node->l) { +#ifdef PATRICIA_DEBUG + fprintf (stderr, "patricia_remove: #0 %s/%d (r & l)\n", + prefix_toa (node->prefix), node->prefix->bitlen); +#endif /* PATRICIA_DEBUG */ + + /* this might be a placeholder node -- have to check and make sure + * there is a prefix aossciated with it ! */ + if (node->prefix != NULL) + Deref_Prefix (node->prefix); + node->prefix = NULL; + /* Also I needed to clear data pointer -- masaki */ + node->data = NULL; + return; + } + + if (node->r == NULL && node->l == NULL) { +#ifdef PATRICIA_DEBUG + fprintf (stderr, "patricia_remove: #1 %s/%d (!r & !l)\n", + prefix_toa (node->prefix), node->prefix->bitlen); +#endif /* PATRICIA_DEBUG */ + parent = node->parent; + Deref_Prefix (node->prefix); + Delete (node); + patricia->num_active_node--; + + if (parent == NULL) { + assert (patricia->head == node); + patricia->head = NULL; + return; + } + + if (parent->r == node) { + parent->r = NULL; + child = parent->l; + } + else { + assert (parent->l == node); + parent->l = NULL; + child = parent->r; + } + + if (parent->prefix) + return; + + /* we need to remove parent too */ + + if (parent->parent == NULL) { + assert (patricia->head == parent); + patricia->head = child; + } + else if (parent->parent->r == parent) { + parent->parent->r = child; + } + else { + assert (parent->parent->l == parent); + parent->parent->l = child; + } + child->parent = parent->parent; + Delete (parent); + patricia->num_active_node--; + return; + } + +#ifdef PATRICIA_DEBUG + fprintf (stderr, "patricia_remove: #2 %s/%d (r ^ l)\n", + prefix_toa (node->prefix), node->prefix->bitlen); +#endif /* PATRICIA_DEBUG */ + if (node->r) { + child = node->r; + } + else { + assert (node->l); + child = node->l; + } + parent = node->parent; + child->parent = parent; + + Deref_Prefix (node->prefix); + Delete (node); + patricia->num_active_node--; + + if (parent == NULL) { + assert (patricia->head == node); + patricia->head = child; + return; + } + + if (parent->r == node) { + parent->r = child; + } + else { + assert (parent->l == node); + parent->l = child; + } +} + diff --git a/src/qdpi.cpp b/src/qdpi.cpp new file mode 100644 index 0000000..d38f0f2 --- /dev/null +++ b/src/qdpi.cpp @@ -0,0 +1,96 @@ +/* +* +* Copyright (C) Max +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +* +*/ + +#include "qdpi.h" +#include + +#include "main.h" + + +static void *malloc_wrapper(unsigned long size) +{ + extFilter::current_ndpi_memory += size; + if(extFilter::current_ndpi_memory > extFilter::max_ndpi_memory) + extFilter::max_ndpi_memory = extFilter::current_ndpi_memory; + return calloc(1,size); +} + +static void free_wrapper(void *freeable) +{ + free(freeable); +} + +void debug_printf(u_int32_t protocol, void *id_struct, ndpi_log_level_t log_level, const char *format, ...) { + va_list va_ap; + struct tm result; + + char buf[8192], out_buf[8192]; + char theDate[32]; + const char *extra_msg = ""; + time_t theTime = time(NULL); + + va_start (va_ap, format); + + /* + if(log_level == NDPI_LOG_ERROR) + extra_msg = "ERROR: "; + else if(log_level == NDPI_LOG_TRACE) + extra_msg = "TRACE: "; + else. + extra_msg = "DEBUG: "; + */ + + memset(buf, 0, sizeof(buf)); + strftime(theDate, 32, "%d/%b/%Y %H:%M:%S", localtime_r(&theTime, &result) ); + vsnprintf(buf, sizeof(buf)-1, format, va_ap); + + snprintf(out_buf, sizeof(out_buf), "%s %s%s", theDate, extra_msg, buf); + printf("%s", out_buf); + Poco::Util::Application& app = Poco::Util::Application::instance(); + std::string msg(&out_buf[0]); + app.logger().information("nDPI message: %s",msg); + + fflush(stdout); + + va_end(va_ap); +} + +struct ndpi_detection_module_struct* init_ndpi() +{ +/* set_ndpi_malloc(malloc_wrapper); + set_ndpi_free(free_wrapper);*/ + struct ndpi_detection_module_struct* my_ndpi_struct = ndpi_init_detection_module(); + + if (my_ndpi_struct == NULL) { + return NULL; + } + +// my_ndpi_struct->http_dont_dissect_response=1; + + NDPI_PROTOCOL_BITMASK all; + +/* NDPI_BITMASK_ADD(all,NDPI_PROTOCOL_HTTP); + NDPI_BITMASK_ADD(all,NDPI_PROTOCOL_SSL); +*/ + // enable all protocols + NDPI_BITMASK_SET_ALL(all); + ndpi_set_protocol_detection_bitmask2(my_ndpi_struct, &all); + + return my_ndpi_struct; +} diff --git a/src/sender.cpp b/src/sender.cpp new file mode 100644 index 0000000..ce6fa51 --- /dev/null +++ b/src/sender.cpp @@ -0,0 +1,264 @@ +/* +* +* Copyright (C) Max +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +* +*/ + +#include "sender.h" +#include +#include +#include + +struct pseudo_header +{ + u_int32_t source_address; + u_int32_t dest_address; + u_int8_t placeholder; + u_int8_t protocol; + u_int16_t tcp_length; +}; + +struct ipv6_pseudo_hdr +{ + struct in6_addr source_address; + struct in6_addr dest_address; + u_int32_t tcp_length; + u_int32_t zero: 24, + nexthdr: 8; +}; + +CSender::CSender(struct params &prm) : _logger(Poco::Logger::get("CSender")), _parameters(prm) +{ + this->s = ::socket( PF_INET, SOCK_RAW, IPPROTO_RAW ); + if( s == -1 ) { + _logger.error("Failed to create IPv4 socket!"); + return; + } + this->s6 = ::socket( PF_INET6, SOCK_RAW, IPPROTO_RAW ); + if( s6 == -1 ) { + _logger.error("Failed to create IPv6 socket!"); + return; + } + + int one = 1; + const int *val = &one; + if( ::setsockopt(this->s, IPPROTO_IP, IP_HDRINCL, val, sizeof(one)) < 0 ) + { + _logger.error("Error setting IP_HDRINCL for IPv4 socket"); + return; + } + + this->rHeader = "HTTP/1.1 "+_parameters.code+"\r\nLocation: " + _parameters.redirect_url + "\r\nConnection: close\r\n"; + _logger.debug("Default header is %s", rHeader); +} + +CSender::~CSender() +{ + ::close(s); + ::close(s6); +} + +void CSender::sendPacket(Poco::Net::IPAddress &ip_from, Poco::Net::IPAddress &ip_to, int port_from, int port_to, uint32_t acknum, uint32_t seqnum, std::string &dt, int f_reset, int f_psh) +{ + char datagram[4096], *data, *pseudogram=NULL; + + // zero out the packet buffer + memset(datagram, 0, sizeof(datagram)); + + // IP header + struct iphdr *iph = (struct iphdr *) datagram; + struct ip6_hdr *iph6 = (struct ip6_hdr *) datagram; + + // TCP header + struct tcphdr *tcph = (struct tcphdr *) (datagram + (ip_from.family() == Poco::Net::IPAddress::IPv4 ? sizeof(struct iphdr) : sizeof(struct ip6_hdr))); + + struct sockaddr_in sin; + struct sockaddr_in6 sin6; + int payloadlen=dt.size(); + + // Data part + data = (char *)tcph + sizeof(struct tcphdr); + memcpy(data,dt.c_str(),payloadlen); + + _logger.debug("Trying to send packet to %s port %d", ip_to.toString(), port_to); + + if(ip_from.family() == Poco::Net::IPAddress::IPv4) + { + sin.sin_family = AF_INET; + sin.sin_port = htons(port_to); + sin.sin_addr.s_addr=((in_addr *)ip_to.addr())->s_addr; + // Fill the IPv4 header + iph->ihl = 5; + iph->version = 4; + iph->tos=0; + iph->tot_len = sizeof(struct iphdr) + sizeof(struct tcphdr) + payloadlen; + iph->id = htons(random()); + iph->frag_off = 0; + iph->ttl = _parameters.ttl; + iph->protocol = IPPROTO_TCP; + iph->check = 0; + iph->saddr = ((in_addr *)ip_from.addr())->s_addr; + iph->daddr = sin.sin_addr.s_addr; + // IP checksum + iph->check = 0; // done by kernel //this->csum((unsigned short *) datagram, iph->tot_len); + } else { + sin6.sin6_family = AF_INET6; + sin6.sin6_port = 0; // not filled in ipv6 + memcpy(&sin6.sin6_addr,ip_to.addr(),sizeof(sin6.sin6_addr)); + // IPv6 version (4 bits), Traffic class (8 bits), Flow label (20 bits) + iph6->ip6_flow = htonl ((6 << 28) | (0 << 20) | 0); + // Payload length (16 bits): TCP header + TCP data + iph6->ip6_plen = htons (sizeof(struct tcphdr) + payloadlen); + // Next header (8 bits): 6 for TCP + iph6->ip6_nxt = IPPROTO_TCP; + // Hop limit (8 bits): default to maximum value + iph6->ip6_hops = 250; + memcpy(&iph6->ip6_src,ip_from.addr(),sizeof(in6_addr)); + memcpy(&iph6->ip6_dst,ip_to.addr(),sizeof(in6_addr)); + } + + // TCP Header + tcph->source = htons(port_from); + tcph->dest = htons(port_to); + tcph->seq = acknum; + tcph->doff = 5; + tcph->syn = 0; + tcph->rst = f_reset; + tcph->psh = f_psh; + if(f_reset) + { + tcph->ack = 0; + tcph->ack_seq = 0; + tcph->fin = 0; + } else { + tcph->ack_seq = seqnum; + tcph->ack = 1; + tcph->fin = 1; + } + tcph->urg = 0; + tcph->window = htons(5840); + tcph->check = 0; + tcph->urg_ptr = 0; + + + + if(ip_from.family() == Poco::Net::IPAddress::IPv4) + { + struct pseudo_header psh; + psh.source_address = ((in_addr *)ip_from.addr())->s_addr; + psh.dest_address = sin.sin_addr.s_addr; + psh.placeholder = 0; + psh.protocol = IPPROTO_TCP; + psh.tcp_length = htons(sizeof(struct tcphdr) + dt.size() ); + + int psize = sizeof(struct pseudo_header) + sizeof(struct tcphdr) + dt.size(); + pseudogram = (char*)calloc(1,psize); + + memcpy( pseudogram, (char*) &psh, sizeof(struct pseudo_header)); + memcpy( pseudogram + sizeof(struct pseudo_header), tcph, sizeof(struct tcphdr) + dt.size()); + + tcph->check = csum( (unsigned short*) pseudogram, psize); + + // Send the packet + if( ::sendto( this->s, datagram, iph->tot_len, 0, (struct sockaddr *)&sin, sizeof(sin)) < 0 ) + { + _logger.error("sendto() failed to %s:%d errno: %d",ip_to.toString(), port_to, errno); + } + } else { + struct ipv6_pseudo_hdr psh; + // filling pseudoheader... + memcpy(&psh.source_address,&iph6->ip6_src,sizeof(iph6->ip6_src)); + memcpy(&psh.dest_address,&iph6->ip6_dst,sizeof(iph6->ip6_dst)); + psh.tcp_length = htonl(sizeof(tcphdr) + payloadlen); + psh.zero = 0; + psh.nexthdr = iph6->ip6_nxt; + int psize = sizeof(ipv6_pseudo_hdr) + sizeof(struct tcphdr) + payloadlen; + + pseudogram = (char*)calloc(1,psize); + memcpy( pseudogram, (char*) &psh, sizeof(struct ipv6_pseudo_hdr)); + memcpy( pseudogram + sizeof(struct ipv6_pseudo_hdr), tcph, sizeof(struct tcphdr) + dt.size()); + + tcph->check = csum( (unsigned short*) pseudogram, psize); + + // Send the packet + if( ::sendto( this->s6, datagram, (sizeof(struct ip6_hdr) + sizeof(struct tcphdr) + payloadlen), 0, (struct sockaddr *)&sin6, sizeof(sin6)) < 0 ) + { + _logger.error("sendto() failed to [%s]:%d errno: %d",ip_to.toString(), port_to, errno); + } + } + if(pseudogram) + free(pseudogram); + + return; +} + +//void CSender::sendPacket(char *ip_from, char *ip_to, int port_from, int port_to, uint32_t acknum, uint32_t seqnum) +void CSender::Redirect(int user_port, int dst_port, Poco::Net::IPAddress &user_ip, Poco::Net::IPAddress &dst_ip, uint32_t acknum, uint32_t seqnum, int f_psh, std::string &additional_param ) +{ + // формируем дополнительные параметры + std::string tstr=rHeader; + if(!additional_param.empty()) + { + tstr = "HTTP/1.1 "+_parameters.code+"\r\nLocation: " + _parameters.redirect_url + additional_param + "\r\nConnection: close\r\n"; + } else { + tstr=rHeader; + } + this->sendPacket(dst_ip, user_ip, dst_port, user_port, acknum, seqnum, tstr, 0, 0); + // And reset session with client +// this->sendPacket( dst_ip, user_ip, dst_port, user_port, acknum, seqnum, redirectHeader, 1, 0); + + // And reset session with server + if(_parameters.send_rst_to_server) + { + std::string empty_str; + this->sendPacket(user_ip, dst_ip, user_port, dst_port, seqnum, acknum, empty_str, 1, f_psh); + } + return; +} + +void CSender::SendRST(int user_port, int dst_port, Poco::Net::IPAddress &user_ip, Poco::Net::IPAddress &dst_ip, uint32_t acknum, uint32_t seqnum, int f_psh) +{ + std::string empty_str; + // send rst to the client + this->sendPacket(dst_ip, user_ip, dst_port, user_port, acknum, seqnum, empty_str, 1, 0); + // send rst to the server + if(_parameters.send_rst_to_server) + this->sendPacket(user_ip, dst_ip, user_port, dst_port, seqnum, acknum, empty_str, 1, 0); +} + +unsigned short CSender::csum( unsigned short *ptr, int nbytes ) +{ + register long sum; + unsigned short oddbyte; + register short answer; + + sum = 0; + while( nbytes > 1 ) { + sum+=*ptr++; + nbytes-=2; + } + if( nbytes==1 ) { + oddbyte=0; + *((u_char*)&oddbyte)=*(u_char*)ptr; + sum+=oddbyte; + } + + sum = (sum>>16)+(sum & 0xffff); + sum = sum+(sum>>16); + answer=(short)~sum; + + return( answer ); +} diff --git a/src/sendertask.cpp b/src/sendertask.cpp new file mode 100644 index 0000000..4658b55 --- /dev/null +++ b/src/sendertask.cpp @@ -0,0 +1,63 @@ +/* +* +* Copyright (C) Max +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +* +*/ + +#include "sendertask.h" + +#include "sender.h" + +Poco::FastMutex SenderTask::_mutex; +Poco::NotificationQueue SenderTask::queue; + +SenderTask::SenderTask(struct CSender::params &prm): + Task("SenderTask"), + sender(new CSender(prm)), + _logger(Poco::Logger::get("SenderTask")) +{ + +} + + +SenderTask::~SenderTask() +{ + delete sender; +} + +void SenderTask::runTask() +{ + _logger.debug("Starting SenderTask..."); + + while(!isCancelled()) + { + Poco::Notification::Ptr pNf(queue.waitDequeueNotification()); + if (pNf) + { + RedirectNotification::Ptr pRedirectNf = pNf.cast(); + if (pRedirectNf) + { + if(pRedirectNf->is_rst()) + sender->SendRST(pRedirectNf->user_port(), pRedirectNf->dst_port(),pRedirectNf->user_ip(),pRedirectNf->dst_ip(), pRedirectNf->acknum(), pRedirectNf->seqnum(), pRedirectNf->f_psh()); + else + sender->Redirect(pRedirectNf->user_port(), pRedirectNf->dst_port(),pRedirectNf->user_ip(),pRedirectNf->dst_ip(), pRedirectNf->acknum(), pRedirectNf->seqnum(), pRedirectNf->f_psh(), pRedirectNf->additional_param()); + } + } + } + + _logger.debug("Stopping SenderTask..."); +} + diff --git a/src/statistictask.cpp b/src/statistictask.cpp new file mode 100644 index 0000000..428ef71 --- /dev/null +++ b/src/statistictask.cpp @@ -0,0 +1,160 @@ +/* +* +* Copyright (C) Max +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +* +*/ + +#define __STDC_FORMAT_MACROS +#include +#include +#include "main.h" +#include "statistictask.h" +#include "stats.h" +#include "worker.h" + +static struct timeval begin_time; + +static std::map map_last_pkts; + +StatisticTask::StatisticTask(int sec, std::vector &workerThreadVector): + Task("StatisticTask"), + _sec(sec), + workerThreadVec(workerThreadVector) +{ +} + +static std::string formatBytes(u_int32_t howMuch) +{ + char unit = 'B'; + char buf[32]; + int buf_len=sizeof(buf); + + if(howMuch < 1024) + { + snprintf(buf, buf_len, "%lu %c", (unsigned long)howMuch, unit); + } else if(howMuch < 1048576) + { + snprintf(buf, buf_len, "%.2f K%c", (float)(howMuch)/1024, unit); + } else { + float tmpGB = ((float)howMuch)/1048576; + if(tmpGB < 1024) + { + snprintf(buf, buf_len, "%.2f M%c", tmpGB, unit); + } else { + tmpGB /= 1024; + snprintf(buf, buf_len, "%.2f G%c", tmpGB, unit); + } + } + return std::string(buf); +} + +static std::string formatPackets(float numPkts) +{ + char buf[32]; + int buf_len=sizeof(buf); + if(numPkts < 1000) + { + snprintf(buf, buf_len, "%.2f", numPkts); + } else if(numPkts < 1000000) + { + snprintf(buf, buf_len, "%.2f K", numPkts/1000); + } else { + numPkts /= 1000000; + snprintf(buf, buf_len, "%.2f M", numPkts); + } + return std::string(buf); +} + +void StatisticTask::OutStatistic() +{ + Poco::Util::Application& app = Poco::Util::Application::instance(); + + for(std::vector::iterator it=workerThreadVec.begin(); it != workerThreadVec.end(); it++) + { + app.logger().information("Thread on core %u statistics:", (*it)->getCoreId()); + const ThreadStats &stats=(static_cast(*it))->getStats(); + unsigned int avg_pkt_size=0; + struct timeval end; + gettimeofday(&end, NULL); + uint64_t last_pkts=0; + std::map::iterator it1=map_last_pkts.find((int)(*it)->getCoreId()); + if(it1 != map_last_pkts.end()) + last_pkts = it1->second; + uint64_t tot_usec = end.tv_sec*1000000 + end.tv_usec - (begin_time.tv_sec*1000000 + begin_time.tv_usec); + float t = (float)((stats.ip_packets-last_pkts)*1000000)/(float)tot_usec; + map_last_pkts[(int)(*it)->getCoreId()]=stats.ip_packets; + gettimeofday(&begin_time, NULL); + if(stats.ip_packets && stats.total_bytes) + avg_pkt_size = (unsigned int)(stats.total_bytes/stats.ip_packets); + app.logger().information("Total seen packets: %" PRIu64 ", Total seen bytes: %" PRIu64 ", Average packet size: %" PRIu32 " bytes, Traffic throughput: %s pps", stats.ip_packets, stats.total_bytes, avg_pkt_size, formatPackets(t)); + app.logger().information("Total matched by ip/port: %" PRIu64 ", Total matched by ssl: %" PRIu64 ", Total matched by ssl/ip: %" PRIu64 ", Total matched by domain: %" PRIu64 ", Total matched by url: %" PRIu64, stats.matched_ip_port, stats.matched_ssl, stats.matched_ssl_ip, stats.matched_domains, stats.matched_urls); + app.logger().information("Total redirected domains %" PRIu64 ", Total redirected urls: %" PRIu64 ", Total rst sended: %" PRIu64, stats.redirected_domains,stats.redirected_urls,stats.sended_rst); + } +/* app.logger().information("nDPI memory (once): %s",formatBytes(sizeof(ndpi_detection_module_struct))); + app.logger().information("nDPI memory per flow: %s",formatBytes(nfqFilter::ndpi_size_flow_struct)); + app.logger().information("nDPI current memory usage: %s",formatBytes(nfqFilter::current_ndpi_memory)); + app.logger().information("nDPI maximum memory usage: %s",formatBytes(nfqFilter::max_ndpi_memory)); + + Poco::TaskManager *pOwner=getOwner(); + if(pOwner) + { + Poco::TaskManager::TaskList tl=pOwner->taskList(); + for(Poco::TaskManager::TaskList::iterator it=tl.begin(); it != tl.end(); it++) + { + std::string threadName=(*it)->name(); + std::size_t found=threadName.find("nfqThread"); + if(found != std::string::npos) + { + // статистика задачи... + struct threadStats stats; + Poco::AutoPtr p=it->cast(); + p->getStats(stats); + unsigned int avg_pkt_size=0; + struct timeval end; + gettimeofday(&end, NULL); + uint64_t tot_usec = end.tv_sec*1000000 + end.tv_usec - (begin_time.tv_sec*1000000 + begin_time.tv_usec); + float t = (float)(stats.ip_packets*1000000)/(float)tot_usec; + if(stats.ip_packets && stats.total_bytes) + avg_pkt_size = (unsigned int)(stats.total_bytes/stats.ip_packets); + + app.logger().information("Total seen packets: %" PRIu64 ", Total seen bytes: %" PRIu64 ", Average packet size: %" PRIu32 " bytes, Traffic throughput: %s pps", stats.ip_packets, stats.total_bytes, avg_pkt_size, formatPackets(t)); + app.logger().information("Total matched by ip/port: %" PRIu64 ", Total matched by ssl: %" PRIu64 ", Total matched by ssl/ip: %" PRIu64, stats.matched_ip_port, stats.matched_ssl, stats.matched_ssl_ip); + app.logger().information("Total redirected domains %" PRIu64 ", Total redirected urls: %" PRIu64 ", Total marked ssl: %" PRIu64 ", Total marked hosts: %" PRIu64 ", Total rst sended: %" PRIu64, stats.redirected_domains,stats.redirected_urls,stats.marked_ssl,stats.marked_hosts,stats.sended_rst); + } + app.logger().debug("State of task %s is %d", (*it)->name(), (int)(*it)->state()); + } + }*/ + +} + +void StatisticTask::runTask() +{ + Poco::Util::Application& app = Poco::Util::Application::instance(); + app.logger().debug("Starting statistic task..."); + gettimeofday(&begin_time, NULL); + int sleep_sec=_sec; + if(!sleep_sec) + sleep_sec=1; + sleep_sec *= 1000; + while (!isCancelled()) + { + sleep(sleep_sec); + if(_sec) + OutStatistic(); + } + app.logger().debug("Stopping statistic task..."); +} + diff --git a/src/worker.cpp b/src/worker.cpp new file mode 100644 index 0000000..419f62a --- /dev/null +++ b/src/worker.cpp @@ -0,0 +1,401 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "worker.h" +#include "main.h" +#include "ndpiwrapper.h" +#include "sendertask.h" + +static pcpp::PcapFileWriterDevice* pcapWriter = NULL; + +bool WorkerThread::analyzePacket(pcpp::Packet &parsedPacket) +{ + Poco::Stopwatch sw; + + m_ThreadStats.total_packets++; + + int ip_version=0; + if(parsedPacket.isPacketOfType(pcpp::IPv4)) + ip_version=4; + else if (parsedPacket.isPacketOfType(pcpp::IPv6)) + ip_version=6; + + if(!ip_version) + { + //_logger.error("Unsupported IP protocol for packet:\n %s", parsedPacket.printToString()); + return false; + } + + m_ThreadStats.ip_packets++; + + pcpp::TcpLayer* tcpLayer = parsedPacket.getLayerOfType(); + if(!tcpLayer) + { +// _logger.debug("Analyzing only TCP protocol, got packet:\n %s", parsedPacket.printToString()); + return false; + } + + + if((tcpLayer->getDataLen()-tcpLayer->getHeaderLen()) == 0) + { +// _logger.debug("Skip packet without data:\n %s", parsedPacket.printToString()); + return false; + } + + m_ThreadStats.analyzed_packets++; + m_ThreadStats.total_bytes += parsedPacket.getRawPacket()->getRawDataLen(); + + int tcp_src_port=ntohs(tcpLayer->getTcpHeader()->portSrc); + int tcp_dst_port=ntohs(tcpLayer->getTcpHeader()->portDst); + + std::unique_ptr src_ip; + std::unique_ptr dst_ip; + if(ip_version == 4) + { + src_ip.reset(new Poco::Net::IPAddress((parsedPacket.getLayerOfType()->getSrcIpAddress().toInAddr()),sizeof(in_addr))); + dst_ip.reset(new Poco::Net::IPAddress((parsedPacket.getLayerOfType()->getDstIpAddress().toInAddr()),sizeof(in_addr))); + } else { + src_ip.reset(new Poco::Net::IPAddress((parsedPacket.getLayerOfType()->getSrcIpAddress().toIn6Addr()),sizeof(in6_addr))); + dst_ip.reset(new Poco::Net::IPAddress((parsedPacket.getLayerOfType()->getDstIpAddress().toIn6Addr()),sizeof(in6_addr))); + } + + if(m_WorkerConfig.ipportMap && m_WorkerConfig.ipportMapLock.tryLock()) + { + IPPortMap::iterator it_ip=m_WorkerConfig.ipportMap->find(*dst_ip.get()); + if(it_ip != m_WorkerConfig.ipportMap->end()) + { + unsigned short port=tcp_dst_port; + if (it_ip->second.size() == 0 || it_ip->second.find(port) != it_ip->second.end()) + { + m_ThreadStats.matched_ip_port++; + _logger.debug("Found record in ip:port list for the client %s:%d and server %s:%d",src_ip->toString(),dst_ip->toString(),tcp_src_port,tcp_dst_port); + return true; + } + } + m_WorkerConfig.ipportMapLock.unlock(); + } + + ndpi_protocol protocol; + sw.reset(); + sw.start(); + nDPIWrapper nw; + struct ndpi_flow_struct *flow=nw.get_flow(); + uint32_t current_tickt = 0; + protocol = ndpi_detection_process_packet(extFilter::my_ndpi_struct, flow, ip_version == 4 ? (parsedPacket.getLayerOfType()->getData()) : (parsedPacket.getLayerOfType()->getData()), (parsedPacket.getLayerOfType()->getDataLen()), current_tickt, nw.get_src(), nw.get_dst()); +/* + if(tcp_dst_port == 80) + { + std::stringstream sstream; + sstream << std::hex; + for(int i=0; i < (parsedPacket.getLayerOfType()->getDataLen()); i++) + { + sstream << "0x" << std::setw(2) << std::setfill('0') << (int) *(parsedPacket.getLayerOfType()->getData()+i) << ","; + } + _logger.debug("data: %s",sstream.str()); + }*/ + + + if(protocol.protocol == NDPI_PROTOCOL_UNKNOWN) + { +// _logger.debug("Guessing protocol..."); + protocol = ndpi_guess_undetected_protocol(extFilter::my_ndpi_struct, + IPPROTO_TCP, // TCP + 0,//ip + tcp_src_port, // sport + 0, + tcp_dst_port); // dport + } + + sw.stop(); +/* _logger.debug("nDPI protocol detection occupied %ld us",sw.elapsed()); + _logger.debug("Protocol is %hu/%hu src port: %d dst port: %d",protocol.master_protocol,protocol.protocol,tcp_src_port,tcp_dst_port); +*/ + + if(protocol.master_protocol == NDPI_PROTOCOL_SSL || protocol.protocol == NDPI_PROTOCOL_SSL || protocol.protocol == NDPI_PROTOCOL_TOR) + { + if(m_WorkerConfig.atmSSLDomains && flow->l4.tcp.ssl_seen_client_cert == 1) + { + std::string ssl_client; + if(flow->protos.ssl.client_certificate[0] != '\0') + { + ssl_client=flow->protos.ssl.client_certificate; +// _logger.debug("SSL client is: %s",ssl_client); + } + if(!ssl_client.empty()) + { + // если не можем выставить lock, то нет смысла продолжать... + if(!m_WorkerConfig.atmSSLDomainsLock.tryLock()) + return false; + sw.reset(); + sw.start(); + if(m_WorkerConfig.lower_host) + std::transform(ssl_client.begin(), ssl_client.end(), ssl_client.begin(), ::tolower); + AhoCorasickPlus::Match match; + std::size_t host_len=ssl_client.length(); + bool found=false; + { + m_WorkerConfig.atmSSLDomains->search(ssl_client,false); + while(m_WorkerConfig.atmSSLDomains->findNext(match) && !found) + { + if(match.pattern.length != host_len) + { + DomainsMatchType::Iterator it=m_WorkerConfig.SSLdomainsMatchType.find(match.id); + bool exact_match=false; + if(it != m_WorkerConfig.SSLdomainsMatchType.end()) + exact_match = it->second; + if(exact_match) + continue; + if(ssl_client[host_len-match.pattern.length-1] != '.') + continue; + } + found=true; + } + } + m_WorkerConfig.atmSSLDomainsLock.unlock(); + sw.stop(); +// _logger.debug("SSL Host seek occupied %ld us, host: %s",sw.elapsed(),ssl_client); + if(found) + { + m_ThreadStats.matched_ssl++; + _logger.debug("SSL host %s present in SSL domain (file line %u) list from ip %s:%d to ip %s:%d", ssl_client, match.id, (parsedPacket.getLayerOfType()->getSrcIpAddress().toString()),tcp_src_port,(parsedPacket.getLayerOfType()->getDstIpAddress().toString()),tcp_dst_port); + if (pcapWriter) + pcapWriter->writePacket(*(parsedPacket.getRawPacket())); + + std::string empty_str; + SenderTask::queue.enqueueNotification(new RedirectNotification(tcp_src_port, tcp_dst_port,src_ip.get(), dst_ip.get(), tcpLayer->getTcpHeader()->ackNumber, tcpLayer->getTcpHeader()->sequenceNumber, (tcpLayer->getTcpHeader()->pshFlag ? 1 : 0 ),empty_str,true)); + m_ThreadStats.sended_rst++; + return true; + } else { + return false; + } + } else { + if(m_WorkerConfig.block_undetected_ssl) + { + if(m_WorkerConfig.sslIPs->try_search_exact_ip(*dst_ip.get())) + { + m_ThreadStats.matched_ssl_ip++; + _logger.debug("Blocking/Marking SSL client hello packet from %s:%d to %s:%d", src_ip->toString(),tcp_src_port,dst_ip->toString(),tcp_dst_port); + m_ThreadStats.sended_rst++; + std::string empty_str; + SenderTask::queue.enqueueNotification(new RedirectNotification(tcp_src_port, tcp_dst_port,src_ip.get(), dst_ip.get(), tcpLayer->getTcpHeader()->ackNumber, tcpLayer->getTcpHeader()->sequenceNumber, (tcpLayer->getTcpHeader()->pshFlag ? 1 : 0 ),empty_str,true)); + return true; + } + return false; + } +// _logger.debug("No ssl client certificate found! Accept packet from %s:%d to %s:%d.",src_ip->toString(),tcp_src_port,dst_ip->toString(),tcp_dst_port); + return false; + } + } + return false; + } + + + if(protocol.master_protocol != NDPI_PROTOCOL_HTTP && protocol.protocol != NDPI_PROTOCOL_HTTP && protocol.protocol != NDPI_PROTOCOL_DIRECT_DOWNLOAD_LINK) + { + return false; + } + + std::string host((char *)&flow->host_server_name[0]); + if((flow->http.method == HTTP_METHOD_GET || flow->http.method == HTTP_METHOD_POST || flow->http.method == HTTP_METHOD_HEAD) && !host.empty()) + { + int dot_del=0; +// _logger.debug("Analyzing host %s", host); + if(m_WorkerConfig.atmDomains && !host.empty()) + { + if(m_WorkerConfig.atmDomainsLock.tryLock()) + { + if(host[host.length()-1] == '.') + { + dot_del=host.length()-1; + host.erase(dot_del,1); + } + if(m_WorkerConfig.lower_host) + std::transform(host.begin(), host.end(), host.begin(), ::tolower); + sw.reset(); + sw.start(); + + AhoCorasickPlus::Match match; + bool found=false; + { + m_WorkerConfig.atmDomains->search(host,false); + std::size_t host_len=host.length(); + while(m_WorkerConfig.atmDomains->findNext(match) && !found) + { + if(match.pattern.length != host_len) + { + DomainsMatchType::Iterator it=m_WorkerConfig.domainsMatchType.find(match.id); + bool exact_match=false; + if(it != m_WorkerConfig.domainsMatchType.end()) + exact_match = it->second; + if(exact_match) + continue; + if(host[host_len-match.pattern.length-1] != '.') + continue; + } + found=true; + } + } + m_WorkerConfig.atmDomainsLock.unlock(); + sw.stop(); + //_logger.debug("Host %s seek occupied %ld us", host, sw.elapsed()); + if(found) + { + m_ThreadStats.matched_domains++; + _logger.debug("Host %s present in domain (file line %u) list from ip %s to ip %s", host, match.id, (parsedPacket.getLayerOfType()->getSrcIpAddress().toString()), (parsedPacket.getLayerOfType()->getDstIpAddress().toString())); + if (pcapWriter) + pcapWriter->writePacket(*(parsedPacket.getRawPacket())); + + if(m_WorkerConfig.http_redirect) + { + std::string add_param; + switch (m_WorkerConfig.add_p_type) + { + case A_TYPE_ID: add_param="id="+std::to_string(match.id); + break; + case A_TYPE_URL: add_param="url="+host; + break; + default: break; + } + SenderTask::queue.enqueueNotification(new RedirectNotification(tcp_src_port, tcp_dst_port, src_ip.get(), dst_ip.get(), tcpLayer->getTcpHeader()->ackNumber, tcpLayer->getTcpHeader()->sequenceNumber, (tcpLayer->getTcpHeader()->pshFlag ? 1 : 0 ), add_param)); + m_ThreadStats.redirected_domains++; + } else { + std::string empty_str; + SenderTask::queue.enqueueNotification(new RedirectNotification(tcp_src_port, tcp_dst_port,src_ip.get(), dst_ip.get(), tcpLayer->getTcpHeader()->ackNumber, tcpLayer->getTcpHeader()->sequenceNumber, (tcpLayer->getTcpHeader()->pshFlag ? 1 : 0 ),empty_str,true)); + m_ThreadStats.sended_rst++; + } + return true; + } + } + } + + std::string uri_o(flow->http.url ? flow->http.url : ""); + if(m_WorkerConfig.atm && !uri_o.empty()) + { +// _logger.debug("test url %s", uri_o); + if(m_WorkerConfig.atmLock.tryLock()) + { + std::string uri; + if(dot_del) + uri_o.erase(dot_del+7,1); + try + { + Poco::URI uri_p(uri_o); + uri_p.normalize(); + uri.assign(uri_p.toString()); +/* if(_config.url_decode) + { +#ifdef __USE_POCO_URI_DECODE + Poco::URI::decode(uri_p.toString(),uri); +#else + uri=url_decode(uri); +#endif + }*/ + } catch (Poco::SyntaxException &ex) + { + _logger.debug("An SyntaxException occured: '%s' on URI: '%s'", ex.displayText(), uri_o); + uri.assign(uri_o); + } + AhoCorasickPlus::Match match; + bool found=false; + m_WorkerConfig.atm->search(uri,false); + while(m_WorkerConfig.atm->findNext(match) && !found) + { + if(m_WorkerConfig.match_url_exactly && uri.length() != match.pattern.length) + continue; + found=true; + } + m_WorkerConfig.atmLock.unlock(); + if(found) + { + m_ThreadStats.matched_urls++; + _logger.debug("URL %s present in url (file pos %u) list from ip %s to ip %s",uri,match.id,(parsedPacket.getLayerOfType()->getSrcIpAddress().toString()),(parsedPacket.getLayerOfType()->getDstIpAddress().toString())); + if (pcapWriter) + pcapWriter->writePacket(*(parsedPacket.getRawPacket())); + if(m_WorkerConfig.http_redirect) + { + std::string add_param; + switch (m_WorkerConfig.add_p_type) + { + case A_TYPE_ID: add_param="id="+std::to_string(match.id); + break; + case A_TYPE_URL: add_param="url="+host; + break; + default: break; + } + SenderTask::queue.enqueueNotification(new RedirectNotification(tcp_src_port, tcp_dst_port, src_ip.get(), dst_ip.get(), tcpLayer->getTcpHeader()->ackNumber, tcpLayer->getTcpHeader()->sequenceNumber, (tcpLayer->getTcpHeader()->pshFlag ? 1 : 0 ), add_param)); + m_ThreadStats.redirected_urls++; + } else { + std::string empty_str; + SenderTask::queue.enqueueNotification(new RedirectNotification(tcp_src_port, tcp_dst_port,src_ip.get(), dst_ip.get(), tcpLayer->getTcpHeader()->ackNumber, tcpLayer->getTcpHeader()->sequenceNumber, (tcpLayer->getTcpHeader()->pshFlag ? 1 : 0 ),empty_str,true)); + m_ThreadStats.sended_rst++; + } + return true; + } + } + } + } + return false; +} + + +bool WorkerThread::run(uint32_t coreId) +{ + m_CoreId = coreId; + m_Stop = false; + + // if no DPDK devices were assigned to this worker/core don't enter the main loop and exit + if (m_WorkerConfig.InDataCfg.size() == 0) + { + return true; + } + + if (!m_WorkerConfig.PathToWritePackets.empty()) + { + pcapWriter = new pcpp::PcapFileWriterDevice(m_WorkerConfig.PathToWritePackets.c_str()); + if (!pcapWriter->open()) + { + _logger.error("Couldn't open pcap writer device"); + } + } + + // main loop, runs until be told to stop + while (!m_Stop) + { + // go over all DPDK devices configured for this worker/core + for (InputDataConfig::iterator iter = m_WorkerConfig.InDataCfg.begin(); iter != m_WorkerConfig.InDataCfg.end(); iter++) + { + // for each DPDK device go over all RX queues configured for this worker/core + for (std::vector::iterator iter2 = iter->second.begin(); iter2 != iter->second.end(); iter2++) + { + pcpp::DpdkDevice* dev = iter->first; + pcpp::MBufRawPacket* packetArr = NULL; + int packetArrLen = 0; + // receive packets from network on the specified DPDK device and RX queue + if (!dev->receivePackets(&packetArr, packetArrLen, *iter2)) + { + _logger.error("Couldn't receive packet from DpdkDevice #%d, RX queue #%d", dev->getDeviceId(), *iter2); + } + + for (int i = 0; i < packetArrLen; i++) + { + pcpp::Packet parsedPacket(&packetArr[i]); + analyzePacket(parsedPacket); + } + delete [] packetArr; + } + } + } + if (pcapWriter != NULL) + delete pcapWriter; + _logger.debug("Worker thread on core %u terminated", coreId); + return true; +}