Class Texy
In: lib/texy.rb
lib/texy/modifier.rb
lib/texy/dom.rb
lib/texy/url.rb
lib/texy/html.rb
lib/texy/html_well_form.rb
lib/texy/constants.rb
lib/texy/parser.rb
lib/texy/modules/long_words.rb
lib/texy/modules/image_desc.rb
lib/texy/modules/phrase.rb
lib/texy/modules/image.rb
lib/texy/modules/definition_list.rb
lib/texy/modules/generic_block.rb
lib/texy/modules/table.rb
lib/texy/modules/block.rb
lib/texy/modules/quote.rb
lib/texy/modules/quick_correct.rb
lib/texy/modules/link.rb
lib/texy/modules/heading.rb
lib/texy/modules/horiz_line.rb
lib/texy/modules/formatter.rb
lib/texy/modules/html.rb
lib/texy/modules/script.rb
lib/texy/modules/smilies.rb
lib/texy/modules/base.rb
lib/texy/modules/list.rb
Parent: Object

This file is part of TexieR - universal text to html converter.

Author

rane <rane@metatribe.org>

Copyright

Original version:

  Copyright (c) 2004-2006 David Grudl

Ruby port:

  Copyright (c) 2006 rane

Texier is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License version 2 as published by the Free Software Foundation.

Texier is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.

Version

 0.1 ($Revision: 25 $ $Date: 2006-10-24 21:52:06 +0200 (Ut, 24 okt 2006) $)

Methods

Classes and Modules

Module Texy::DomEasyAccess
Module Texy::Modules
Class Texy::BlockElement
Class Texy::BlockParser
Class Texy::BlockQuoteElement
Class Texy::CodeBlockElement
Class Texy::Dom
Class Texy::DomElement
Class Texy::DomLine
Class Texy::GenericBlockElement
Class Texy::HeadingElement
Class Texy::Html
Class Texy::HtmlBlockElement
Class Texy::HtmlElement
Class Texy::HtmlParser
Class Texy::HtmlTagElement
Class Texy::ImageDescElement
Class Texy::ImageElement
Class Texy::ImageReference
Class Texy::InlineTagElement
Class Texy::LineParser
Class Texy::LinkElement
Class Texy::LinkRefElement
Class Texy::LinkReference
Class Texy::ListElement
Class Texy::ListItemElement
Class Texy::Modifier
Class Texy::Parser
Class Texy::QuoteElement
Class Texy::SourceBlockElement
Class Texy::TextualElement
Class Texy::Url

Constants

HASH = "\x15-\x1F"   hashing meta-charakters
HASH_SPACES = "\x15-\x18"
HASH_NC = "\x19\x1B-\x1F"
HASH_WC = "\x1A-\x1F"
PATTERN_LINK_REF = /\[[^\[\]\*\n#{HASH}]+?\]/   links
PATTERN_LINK_IMAGE = /\[\*[^\n#{HASH}]+\*\]/
PATTERN_LINK_URL = /(?:\[[^\]\n]+?\]|(?!\[)[^\s#{HASH}]*[^:\);,\.!\?\s#{HASH}])/
PATTERN_LINK = /(?::(#{PATTERN_LINK_URL}))/
PATTERN_LINK_N = /(?::(#{PATTERN_LINK_URL}|:))/
PATTERN_EMAIL = /[a-z0-9.+_-]+@[a-z0-9.+_-]{2,}\.[a-z]{2,}/
PATTERN_MODIFIER = /(?:\ *(?:\ \.|^\.)(\([^\n\)]+\)|\[[^\n\]]+\]|\{[^\n\}]+\})(\([^\n\)]+\)|\[[^\n\]]+\]|\{[^\n\}]+\})??(\([^\n\)]+\)|\[[^\n\]]+\]|\{[^\n\}]+\})??)/   modifier .(title)[class]{style}
PATTERN_MODIFIER_H = /(?:\ *(?:\ \.|^\.)(\([^\n\)]+\)|\[[^\n\]]+\]|\{[^\n\}]+\}|(?:<>|>|=|<))(\([^\n\)]+\)|\[[^\n\]]+\]|\{[^\n\}]+\}|(?:<>|>|=|<))??(\([^\n\)]+\)|\[[^\n\]]+\]|\{[^\n\}]+\}|(?:<>|>|=|<))??(\([^\n\)]+\)|\[[^\n\]]+\]|\{[^\n\}]+\}|(?:<>|>|=|<))??)/   modifier .(title)[class]{style}<>
PATTERN_MODIFIER_HV = /(?:\ *(?:\ \.|^\.)(\([^\n\)]+\)|\[[^\n\]]+\]|\{[^\n\}]+\}|(?:<>|>|=|<)|(?:\^|\-|\_))(\([^\n\)]+\)|\[[^\n\]]+\]|\{[^\n\}]+\}|(?:<>|>|=|<)|(?:\^|\-|\_))??(\([^\n\)]+\)|\[[^\n\]]+\]|\{[^\n\}]+\}|(?:<>|>|=|<)|(?:\^|\-|\_))??(\([^\n\)]+\)|\[[^\n\]]+\]|\{[^\n\}]+\}|(?:<>|>|=|<)|(?:\^|\-|\_))??(\([^\n\)]+\)|\[[^\n\]]+\]|\{[^\n\}]+\}|(?:<>|>|=|<)|(?:\^|\-|\_))??)/   modifier .(title)[class]{style}<>^
PATTERN_IMAGE = /\[\*([^\n#{HASH}]+?)#{PATTERN_MODIFIER}?\ *(\*|>|<)\]/   images [* urls .(title)[class]{style} >]

Attributes

allowed_classes  [RW]  Allowed classes
allowed_styles  [RW]  Allowed inline CSS styles
allowed_tags  [RW]  Allowed HTML tags
block_module  [R]  Default modules
block_patterns  [R]  Registered regexps and associated handlers for block parsing.

Format:

  {:handler => proc,
   :pattern => regular expression}
definition_list_module  [R]  Default modules
dom  [R]  DOM structure for parsed text
formatter_module  [R]  Default modules
generic_block_module  [R]  Default modules
heading_module  [R]  Default modules
horiz_line_module  [R]  Default modules
html_module  [R]  Default modules
image_desc_module  [R]  Default modules
image_module  [R]  Default modules
line_patterns  [R]  Registered regexps and associated handlers for inline parsing.

Format:

  {:handler => proc,
   :pattern => regular expression}
link_module  [R]  Default modules
list_module  [R]  Default modules
merge_lines  [RW]  Merge lines mode
modules  [R]  List of all used modules
obfuscate_email  [W]  Do obfuscate e-mail addresses?
phrase_module  [R]  Default modules
quick_correct_module  [R]  Default modules
quote_module  [R]  Default modules
reference_handler  [RW] 
smilies_module  [R]  Default modules
summary  [RW]  Parsing summary
tab_width  [RW]  TAB width (for converting tabs to spaces)
table_module  [R]  Default modules

Public Class methods

Translate all white spaces (\t \n \r space) to meta-spaces \x15-\x18 which are ignored by some formatting functions

[Source]

# File lib/texy.rb, line 288
    def self.freeze_spaces(string)
        string.tr(" \t\r\n", "\x15\x16\x17\x18")
    end

[Source]

# File lib/texy.rb, line 302
    def self.hash_opening?(hash)
        hash[1].chr == "\x1F"
    end

[Source]

# File lib/texy.rb, line 104
    def initialize
        self.tab_width = 8
        self.allowed_classes = :all
        self.allowed_styles = :all
        self.allowed_tags = Texy::Html::VALID # full support for HTML tags
        self.obfuscate_email = true
        self.summary = {
            :images => [],
            :links => [],
            :preload => []
        }
        self.merge_lines = true

        @line_patterns = []
        @block_patterns = []

        @references = {}

        # load all modules
        load_modules
    end

Revert meta-spaces back to normal spaces

[Source]

# File lib/texy.rb, line 293
    def self.unfreeze_spaces(string)
        string.tr("\x15\x16\x17\x18", " \t\r\n")
    end

Remove special controls chars used by Texy!

[Source]

# File lib/texy.rb, line 298
    def self.wash(text)
        text.gsub(/[\x15-\x1F]+/, '')
    end

Public Instance methods

Add new named reference

[Source]

# File lib/texy.rb, line 309
    def add_reference(name, obj)
        name.downcase! # watch out for utf8!
        @references[name] = obj
    end

[Source]

# File lib/texy.rb, line 89
    def obfuscate_email?
        @obfuscate_email
    end

Convert Texy! document into internal DOM structure.

Before converting it normalizes text and calls all pre-processing modules.

[Source]

# File lib/texy.rb, line 140
    def parse(source)
        # initialization
        init

        # process
        @dom = Texy::Dom.new(self)
        @dom.parse(source)
    end

Covert Texy! document into (X)HTML code.

[Source]

# File lib/texy.rb, line 127
    def process(source, single_line = false)
        if single_line
            parse_line(source)
        else
            parse(source)
        end

        dom.to_html
    end

Receive new named link. If not exists, try call user function to create one.

[Source]

# File lib/texy.rb, line 315
    def reference(name)
        low_name = name.downcase # watch out for UTF8 !


        return @references[low_name] if @references[low_name]
        return reference_handler.call(name, self) if reference_handler

        false
    end

[Source]

# File lib/texy.rb, line 234
    def register_block_pattern(handler, pattern)
        # raise ArgumentError, 'Not a block pattern: ' + pattern.source unless /(.)\^.*\$\\1[a-z]*/i =~ pattern

        @block_patterns << {
            :handler => handler,
            :pattern => pattern
        }
    end

[Source]

# File lib/texy.rb, line 220
    def register_line_pattern(handler, pattern)
        @line_patterns << {
            :handler => handler,
            :pattern => pattern
        }
    end

[Source]

# File lib/texy.rb, line 154
    def register_module(mod)
        @modules ||= []
        @modules << mod
    end

Switch Texy and default modules to safe mode

Suitable for "comments" and other usages, where attacker may insert input text.

[Source]

# File lib/texy.rb, line 265
    def safe_mode
        self.allowed_classes = false # no class or ID are allowed
        self.allowed_styles = false # style modifiers are disabled
        html_module.safe_mode # only HTML tags and attributes specified in $safeTags array are allowed
        image_module.allowed = false # disable images
        link_module.force_no_follow = true # force rel="nofollow"
    end

Switch Texy and default modules to (default) trust mode

[Source]

# File lib/texy.rb, line 276
    def trust_mode
        self.allowed_classes = :all # classes and id are allowed
        self.allowed_styles = :all # inline styles are allowed
        html_module.trust_mode # full support for HTML tags
        image_module.allowed = true # enable images
        link_module.force_no_follow = true # disable automatic rel="nofollow"
    end

Protected Instance methods

Initialization

It is called between constructor and first use (method parse).

[Source]

# File lib/texy.rb, line 248
    def init
        @cache = []
        @line_patterns = []
        @block_patterns = []

        raise RuntimeError, 'Texy: No modules installed' if modules.empty?

        # init modules
        modules.map &:init
    end

Create array of all used modules.

This array can be changed by overriding this method (by subclasses) or directly in main code.

[Source]

# File lib/texy.rb, line 184
    def load_modules
        # Line parsing - order does not matter
        @script_module = Modules::Script.new(self)
        @html_module = Modules::Html.new(self)
        @image_module = Modules::Image.new(self)
        @link_module = Modules::Link.new(self)
        @phrase_module = Modules::Phrase.new(self)
        @smilies_module = Modules::Smilies.new(self)

        # Block parsing - order does not matter
        @block_module = Modules::Block.new(self)
        @heading_module = Modules::Heading.new(self)
        @horiz_line_module = Modules::HorizLine.new(self)
        @quote_module = Modules::Quote.new(self)
        @list_module = Modules::List.new(self)
        @definition_list_module = Modules::DefinitionList.new(self)
        @table_module = Modules::Table.new(self)
        @image_desc_module = Modules::ImageDesc.new(self)
        @generic_block_module = Modules::GenericBlock.new(self)

        # post process
        @quick_correct_module = Modules::QuickCorrect.new(self)
#        @long_words_module = Modules::LongWords.new(self)
        @formatter_module = Modules::Formatter.new(self)
    end

[Validate]