ilscipio/scipio-erp

View on GitHub
applications/product/config/SeoConfig.xml

Summary

Maintainability
Test Coverage
<?xml version="1.0" encoding="UTF-8" ?>
<seo-config xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"><!-- omitted for now: xsi:noNamespaceSchemaLocation="../dtd/SeoConfig.xsd" -->
    
    <!-- 
        SCIPIO: ALTERNATIVE URL AND SEO URL COMMON CONFIGURATION 
    
        This file controls the format of the non-SEO and SEO alternative URLs (ALTERNATIVE_URL)
        generated by the generateWebsiteAlternativeUrls/generateAllAlternativeUrls services
        and any ECA/SECA that might trigger them, as well as the WebSite SEO screen 
        (https://localhost:8443/catalog/control/WebSiteSeo?webSiteId=ScipioWebStore).
        
        For websites which have SEO URLs enabled, it also controls their format and handling.
        To enable advanced SEO URLs for a website, it must:
            1) Have its mount-point in the allowed-context-paths list below
            2) Define SeoCatalogUrlFilter in its web.xml, with seoUrlEnabled filter init-param set to true
               (see component://shop/webapp/shop/WEB-INF/web.xml for example and additional notes)
           
        WARN: Enabling any *-implicit-* settings or settings that omit IDs (*-append-id disable)
            can potentially cause conflicts if two categories or products share the same name.
            The SEO filter can handle some of these cases, where the category trail is enough to disambiguate,
            but otherwise it is up to the store to avoid having duplicate names while these settings are in use.
            Generally, it is most important not to have duplicate category names, as much as possible.
            If issues, debug by switching these settings and querying Product[Category]ContentAndElectronicText entities (ALTERNATIVE_URL).
            The *-max-length settings can also affect this.
    
        WARN: 2017-11: Some combinations of settings (other than the defaults in trunk) 
            may be untested at this time, notably the *-implicit-* settings.
    -->
    
    <regexpifmatch>^.*/.*$</regexpifmatch>

    <seo-url>
        <!-- Global state toggle flag for SEO alternative URLs (if disable, uses basic CatalogUrlFilter/CatalogUrlServlet functionality) -->
        <state>enable</state>
        <!-- If state is enabled, only context paths listed here will be allowed to use the new seo transformers 
            If set to a single "*", no restriction by context path. -->
        <!-- 2018-11-05: There is little reason to filter here anymore; with *, any webapp having
            SeoCatalogUrlFilter in web.xml should automatically register itself as seo-enabled.
        <allowed-context-paths>/:/shop</allowed-context-paths>-->
        <allowed-context-paths>*</allowed-context-paths>

        <!-- Suffix to add to all SEO URLs. e.g.: 
        <seo-url-suffix>.html</seo-url-suffix>-->
        <seo-url-suffix></seo-url-suffix>

        <!-- Suffix behavior when matching incoming URLs:
            disable (default) - The suffix is simply stripped, can still work without it.
            enable - The suffix is required; if not provided in the incoming URL:
                     for implicit product/category requests, the URL is ignored by the filter (will give 404 or something else)
                     for explicit (servlet-like) product/category requests, a forward is made to the product/category request but with a null product/category ID
        -->
        <seo-url-suffix-match-required>disable</seo-url-suffix-match-required>

        <!-- max name lengths -->
        <category-name-max-length>24</category-name-max-length>
        <product-name-max-length>40</product-name-max-length>

        <!-- Category trail format for product and category URLs. May be "name" (default), "id" or "disable"
            For product-url-trail-format="disable": Whether generated product URLs should include parent category name/trail/path/rollup, or product info only.
            1. if enable, the product seo url will be (or as configured): /category-name(-category-id)/product-name(-product-id)(.html)
            2. if disable, the product seo url will be (or as configured): /product-name(-product-id)(.html) -->
        <product-url-trail-format>name</product-url-trail-format>
        <category-url-trail-format>name</category-url-trail-format>

        <!-- DEPRECATED: Use product-url-trail-format and category-url-trail-format instead -->
        <category-name>enable</category-name>

        <!-- Target product URL generation target pattern
            TODO: product-url-target-pattern not fully implemented.
            WARN: Changing this pattern may not necessarily produce browsable URLs; the filter that accepts the request may need to be modified.
            There must be only one separator (-). Only consulted if both name and id are present. -->
        <product-url-target-pattern>${name}-${id}</product-url-target-pattern>

        <!-- If enable, prioritizes product URL in the explicit form /shop/product/ID/...
            WARN: may interfere with full category trail URLs and may cause extra paths on line to be ignored. -->
        <product-simple-id-lookup>disable</product-simple-id-lookup>
        <category-simple-id-lookup>disable</category-simple-id-lookup>

        <!-- Appends category ID for intermediate (non-last) categories in generated product and category links -->
        <category-name-append-id>disable</category-name-append-id>
        <!-- Appends category ID for terminating (last) category in generated category links -->
        <category-name-append-id-last>disable</category-name-append-id-last> 
        <!-- Appends product ID for in generated product links -->
        <product-name-append-id>enable</product-name-append-id>
        
        <!-- If enabled, filter tries (best-effort) to match incoming product and category paths
            that are implicit and have no explicit servlet path prefix or legacy alt url suffix.  -->
        <handle-implicit-requests>disable</handle-implicit-requests> 
        <!-- If enabled, implicit requests will only match category name paths or name+id paths, but not IDs alone (which may be too generic) 
             WARN/FIXME: This SHOULD be enabled by default in Scipio, but we currently cannot set enable as default
                for Scipio because it requires all alt URLs to be pre-generated (TODO), because otherwise the fallbacks by ID simply fail. -->
        <implicit-request-name-matches-only>disable</implicit-request-name-matches-only> 
        <!-- Implicit category URLs. WARN: Works, but risk of conflicts. -->
        <generate-implicit-category-url>disable</generate-implicit-category-url>
        <!-- Implicit product URLs. WARN: Risk of conflicts; also, never enable this if product-name-append-id is disabled.  -->
        <generate-implicit-product-url>disable</generate-implicit-product-url>
        <!-- Implicit product URLs that have no category path (special-case control). -->
        <generate-implicit-product-url-nocat>disable</generate-implicit-product-url-nocat>
        
        <!-- Controls whether filter will allow matching products/categories that don't have a rollup/trail to the catalog
            NOTE: Can be set to enable because screens have to re-run this check anyway. -->
        <allow-target-outside-catalog>enable</allow-target-outside-catalog>
        <!-- If true, the filter will recognize products/categories even if their path elements are invalid categories; a default path will be selected.
            NOTE: Much more user-friendly to leave this enabled. -->
        <allow-invalid-category-path-elems>enable</allow-invalid-category-path-elems>
        
        <!-- Whether to handle legacy-like (CatalogUrlFilter) alt url suffix in incoming filter (treated as explicit requests) -->
        <handle-alt-url-suffix>enable</handle-alt-url-suffix>
        <!-- Whether to append legacy alt url suffixes for explicit requests, instead of product/category servlet path prefix.
            (these appear pseudo-implicit in how they look, but are technically explicit)
            NOTE: "implicit" generation settings, when enabled, will negate these ones. -->
        <generate-product-alt-url-suffix>disable</generate-product-alt-url-suffix>
        <generate-category-alt-url-suffix>disable</generate-category-alt-url-suffix>
        <!-- Non-localized suffix strings. Legacy suffixes are "-p" and "-c" for product and category, respectively. -->
        <product-alt-url-suffix>-p</product-alt-url-suffix>
        <category-alt-url-suffix>-c</category-alt-url-suffix>
        
        <!-- When generating links, the algorithm to use to pick best category rollup trail/path from the hint/breadcrumb trail.
            Values: nonconsec-anypart (default - permissive), nonconsec-endswith (stricter). -->
        <new-url-trail-resolver>nonconsec-anypart</new-url-trail-resolver>
    </seo-url>

    <url-configs>
        <default-response-code>301</default-response-code>
        <default-redirect-response-code>301</default-redirect-response-code>
        <!-- 2017-11: Not currently needed or supported - use urlrewrite.xml (TODO: REVIEW: can probably delete this)
        <!- - Example
        <url-config>
            <description>sample: remove /main</description>
            <url-pattern>^/main$</url-pattern>
            <seo>
                <replacement>/</replacement>
            </seo>
            <forward>
                <replacement>/</replacement>
                <responsecode>301</responsecode>
            </forward>
        </url-config> - ->
        -->
    </url-configs>

    <!-- Method used to build the SEO alternative URL before it is stored in DB as ALTERNATIVE_URL. 
        NOTE: Contrary to stock ofbiz, the entire processing is done prior to storage in DB -
            processing upon lookup must be avoided entirely to allow sane DB queries. -->
    <url-processors type="alt-url-gen">
        <!-- static-method processor: defines a static method that takes a String and returns a String -->
        <processor type="static-method">com.ilscipio.scipio.ce.util.SeoStringUtil#constructSeoName</processor>
        <!-- char-filters processor: references a <char-filters name="..."> element by name -->
        <processor type="char-filters">default</processor>
        <!-- The legacy CatalogUrlFilter post-filter - may be good to leave for compatibility and extra safety -->
        <processor type="static-method">org.ofbiz.common.UrlServletHelper#invalidCharacter</processor>
    </url-processors>

    <!-- Runtime URL handlers (builders, sanitizers) -->
    <url-handlers>
        <!-- Main worker, must extends com.ilscipio.scipio.product.seo.SeoCatalogUrlWorker
            Default implementation: com.ilscipio.scipio.product.seo.SeoCatalogUrlWorker -->
        <!--<worker factoryClass="com.ilscipio.scipio.product.seo.SeoCatalogUrlWorker$Factory"/>-->
    </url-handlers>

    <!-- Simple regex character filters, applied with "replaceAll" in the order defined.
        NOTE: These could be done URLRewriteFilter but might be advantage to have regexp specific to prod/catalog links -->
    <char-filters name="default">
        <char-filter>
            <character-pattern>\u00fc</character-pattern>
            <replacement>ue</replacement>
        </char-filter>
        <char-filter>
            <character-pattern>\u00e4</character-pattern>
            <replacement>ae</replacement>
        </char-filter>
        <char-filter>
            <character-pattern>\u00f6</character-pattern>
            <replacement>oe</replacement>
        </char-filter>
        <char-filter>
            <character-pattern>\u00df</character-pattern>
            <replacement>ss</replacement>
        </char-filter>
        <char-filter>
            <character-pattern>\\+</character-pattern>
            <replacement>und</replacement>
        </char-filter>
        <char-filter>
            <character-pattern>\u0026</character-pattern>
            <replacement>und</replacement>
        </char-filter>
        <char-filter>
            <character-pattern>รจ</character-pattern>
            <replacement>e</replacement>
        </char-filter>
        <!-- please keep the following 2 filters, don't remove them -->
        <char-filter>
            <character-pattern>[^A-Za-z0-9+-]</character-pattern>
            <replacement>-</replacement>
        </char-filter>
        <char-filter>
            <character-pattern>-{2,}</character-pattern>
            <replacement>-</replacement>
        </char-filter>
    </char-filters>

    <!-- URL filters, match against the full URL, matched entries are filtered out -->
    <url-filters>
        <!-- Skip jackets
        <url-filter type="regex">^([^/]*//[^/]*)?(/control)?/shop.*mens-jacket.*$</url-filter>-->
    </url-filters>
</seo-config>