LangChain Integration Guide

Complete guide to integrating Cortex with LangChain for building powerful AI applications with real-time web knowledge.

🚀 Quick Start

Installation

pip install langchain cortex-ai openai

Basic Setup

from langchain.tools import Tool
from langchain.agents import initialize_agent, AgentType
from langchain.llms import OpenAI
from cortex import CortexClient
import os

# Initialize clients
cortex_client = CortexClient(api_key=os.getenv("CORTEX_API_KEY"))
llm = OpenAI(temperature=0, openai_api_key=os.getenv("OPENAI_API_KEY"))

# Create Cortex tools
def cortex_search(query: str) -> str:
    """Search the web for current information"""
    result = cortex_client.search(query, max_results=5)
    if result.success:
        return f"Summary: {result.summary}\n\nSources: {[s.url for s in result.sources]}"
    return "Search failed"

def cortex_extract(url: str) -> str:
    """Extract content from a specific URL"""
    result = cortex_client.extract(url)
    if result.success:
        return result.text[:2000]  # Limit for context
    return "Extraction failed"

def cortex_validate(claim: str) -> str:
    """Validate a factual claim"""
    result = cortex_client.validate(claim)
    return f"Validation: {result.validation_result} (Confidence: {result.confidence_score:.2f})"

# Create LangChain tools
tools = [
    Tool(
        name="Web Search",
        func=cortex_search,
        description="Search the web for current information about any topic"
    ),
    Tool(
        name="Extract Content",
        func=cortex_extract,
        description="Extract clean text content from a specific URL"
    ),
    Tool(
        name="Validate Claim",
        func=cortex_validate,
        description="Validate whether a factual claim is true or false"
    )
]

# Initialize agent
agent = initialize_agent(
    tools,
    llm,
    agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
    verbose=True
)

# Use the agent
response = agent.run("What are the latest developments in quantum computing?")
print(response)

🔧 Advanced Integrations

Custom Cortex Tool Class

from langchain.tools import BaseTool
from langchain.callbacks.manager import CallbackManagerForToolRun
from typing import Optional, Dict, Any
from pydantic import BaseModel, Field

class CortexSearchTool(BaseTool):
    name = "cortex_search"
    description = "Search the web for current information. Input should be a search query."
    
    cortex_client: Any = Field(exclude=True)
    
    def __init__(self, cortex_client, **kwargs):
        super().__init__(**kwargs)
        self.cortex_client = cortex_client
    
    def _run(
        self,
        query: str,
        run_manager: Optional[CallbackManagerForToolRun] = None,
    ) -> str:
        """Execute the search"""
        try:
            result = self.cortex_client.search(
                query=query,
                max_results=8,
                recency="week"
            )
            
            if result.success:
                # Format response with sources
                formatted_response = f"Summary: {result.summary}\n\n"
                formatted_response += "Sources:\n"
                
                for i, source in enumerate(result.sources[:5], 1):
                    formatted_response += f"{i}. {source.title} ({source.url})\n"
                    formatted_response += f"   Confidence: {source.confidence:.2f}\n"
                
                return formatted_response
            else:
                return "Search failed - no results found"
                
        except Exception as e:
            return f"Search error: {str(e)}"

class CortexValidationTool(BaseTool):
    name = "cortex_validate"
    description = "Validate factual claims against multiple sources. Input should be a claim to verify."
    
    cortex_client: Any = Field(exclude=True)
    
    def __init__(self, cortex_client, **kwargs):
        super().__init__(**kwargs)
        self.cortex_client = cortex_client
    
    def _run(
        self,
        claim: str,
        run_manager: Optional[CallbackManagerForToolRun] = None,
    ) -> str:
        """Execute the validation"""
        try:
            result = self.cortex_client.validate(
                claim=claim,
                options={
                    "confidence_threshold": 0.7,
                    "require_primary_sources": True
                }
            )
            
            response = f"Claim: {claim}\n"
            response += f"Validation Result: {result.validation_result}\n"
            response += f"Confidence Score: {result.confidence_score:.2f}\n"
            response += f"Consensus Level: {result.consensus_level}\n\n"
            
            if result.evidence.supporting:
                response += "Supporting Evidence:\n"
                for evidence in result.evidence.supporting[:3]:
                    response += f"- {evidence.source} (Authority: {evidence.authority_score:.2f})\n"
            
            if result.evidence.contradicting:
                response += "\nContradicting Evidence:\n"
                for evidence in result.evidence.contradicting[:3]:
                    response += f"- {evidence.source}\n"
            
            return response
            
        except Exception as e:
            return f"Validation error: {str(e)}"

# Usage
cortex_client = CortexClient(api_key="your_key")

tools = [
    CortexSearchTool(cortex_client),
    CortexValidationTool(cortex_client)
]

agent = initialize_agent(
    tools,
    llm,
    agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
    verbose=True
)

Multi-Step Research Agent

from langchain.agents import Tool, AgentExecutor, LLMSingleActionAgent
from langchain.prompts import StringPromptTemplate
from langchain.chains import LLMChain
from langchain.schema import AgentAction, AgentFinish
import re

class ResearchPromptTemplate(StringPromptTemplate):
    template: str
    tools: list[Tool]
    
    def format(self, **kwargs) -> str:
        intermediate_steps = kwargs.pop("intermediate_steps")
        thoughts = ""
        
        for action, observation in intermediate_steps:
            thoughts += action.log
            thoughts += f"\nObservation: {observation}\nThought: "
        
        kwargs["agent_scratchpad"] = thoughts
        kwargs["tools"] = "\n".join([f"{tool.name}: {tool.description}" for tool in self.tools])
        kwargs["tool_names"] = ", ".join([tool.name for tool in self.tools])
        
        return self.template.format(**kwargs)

class ResearchOutputParser:
    def parse(self, llm_output: str):
        if "Final Answer:" in llm_output:
            return AgentFinish(
                return_values={"output": llm_output.split("Final Answer:")[-1].strip()},
                log=llm_output,
            )
        
        regex = r"Action\s*\d*\s*:(.*?)\nAction\s*\d*\s*Input\s*\d*\s*:[\s]*(.*)"
        match = re.search(regex, llm_output, re.DOTALL)
        
        if not match:
            raise ValueError(f"Could not parse LLM output: `{llm_output}`")
        
        action = match.group(1).strip()
        action_input = match.group(2)
        
        return AgentAction(tool=action, tool_input=action_input.strip(" ").strip('"'), log=llm_output)

def create_research_agent(cortex_client, llm):
    """Create a specialized research agent"""
    
    def comprehensive_search(topic: str) -> str:
        """Perform comprehensive research on a topic"""
        # Multi-angle search
        search_angles = [
            f"latest {topic} developments",
            f"{topic} research papers 2024",
            f"{topic} industry trends",
            f"{topic} expert opinions"
        ]
        
        all_results = []
        for angle in search_angles:
            result = cortex_client.search(angle, max_results=5)
            if result.success:
                all_results.extend(result.sources)
        
        # Deduplicate and rank sources
        unique_sources = {}
        for source in all_results:
            if source.url not in unique_sources:
                unique_sources[source.url] = source
            else:
                # Keep the one with higher confidence
                if source.confidence > unique_sources[source.url].confidence:
                    unique_sources[source.url] = source
        
        # Format comprehensive report
        sorted_sources = sorted(unique_sources.values(), key=lambda x: x.confidence, reverse=True)
        
        report = f"Comprehensive Research Report: {topic}\n\n"
        report += f"Found {len(sorted_sources)} unique sources\n\n"
        
        for i, source in enumerate(sorted_sources[:10], 1):
            report += f"{i}. {source.title}\n"
            report += f"   URL: {source.url}\n"
            report += f"   Confidence: {source.confidence:.2f}\n"
            report += f"   Snippet: {source.snippet[:200]}...\n\n"
        
        return report
    
    def deep_extract_and_analyze(url: str) -> str:
        """Extract and analyze content from URL"""
        content = cortex_client.extract(url, include_metadata=True)
        if not content.success:
            return "Failed to extract content"
        
        # Analyze content
        analysis = f"Content Analysis for: {url}\n\n"
        analysis += f"Title: {content.metadata.title}\n"
        analysis += f"Author: {content.metadata.get('author', 'Unknown')}\n"
        analysis += f"Word Count: {content.metadata.word_count}\n"
        analysis += f"Quality Score: {content.quality_score:.2f}\n\n"
        
        # Extract key points (first few paragraphs)
        paragraphs = content.text.split('\n\n')[:5]
        analysis += "Key Points:\n"
        for i, para in enumerate(paragraphs, 1):
            if len(para.strip()) > 50:
                analysis += f"{i}. {para.strip()[:200]}...\n"
        
        return analysis
    
    def fact_check_claims(claims: str) -> str:
        """Fact-check multiple claims"""
        claim_list = [claim.strip() for claim in claims.split('\n') if claim.strip()]
        results = []
        
        for claim in claim_list[:5]:  # Limit to 5 claims
            validation = cortex_client.validate(claim)
            results.append({
                "claim": claim,
                "result": validation.validation_result,
                "confidence": validation.confidence_score
            })
        
        # Format results
        report = "Fact-Check Results:\n\n"
        for i, result in enumerate(results, 1):
            report += f"{i}. Claim: {result['claim']}\n"
            report += f"   Result: {result['result']}\n"
            report += f"   Confidence: {result['confidence']:.2f}\n\n"
        
        return report
    
    tools = [
        Tool(
            name="Comprehensive Search",
            func=comprehensive_search,
            description="Perform comprehensive research on a topic from multiple angles"
        ),
        Tool(
            name="Deep Content Analysis",
            func=deep_extract_and_analyze,
            description="Extract and analyze content from a specific URL in detail"
        ),
        Tool(
            name="Fact Check Claims",
            func=fact_check_claims,
            description="Fact-check multiple claims (one per line)"
        )
    ]
    
    prompt_template = """You are a research assistant with access to real-time web information. Your goal is to provide comprehensive, accurate, and well-sourced research.

You have access to the following tools:
{tools}

Use the following format:
Question: the input question you must answer
Thought: you should always think about what to do
Action: the action to take, should be one of [{tool_names}]
Action Input: the input to the action
Observation: the result of the action
... (this Thought/Action/Action Input/Observation can repeat N times)
Thought: I now know the final answer
Final Answer: the final answer to the original input question

Question: {input}
{agent_scratchpad}"""
    
    prompt = ResearchPromptTemplate(
        template=prompt_template,
        tools=tools,
        input_variables=["input", "intermediate_steps"]
    )
    
    llm_chain = LLMChain(llm=llm, prompt=prompt)
    output_parser = ResearchOutputParser()
    
    agent = LLMSingleActionAgent(
        llm_chain=llm_chain,
        output_parser=output_parser,
        stop=["\nObservation:"],
        allowed_tools=[tool.name for tool in tools]
    )
    
    return AgentExecutor.from_agent_and_tools(agent=agent, tools=tools, verbose=True)

# Usage
research_agent = create_research_agent(cortex_client, llm)
result = research_agent.run("Analyze the current state of AI safety research and fact-check key claims")

🔄 Document Loaders and Retrievers

Cortex Document Loader

from langchain.docstore.document import Document
from langchain.document_loaders.base import BaseLoader
from typing import List
import time

class CortexWebLoader(BaseLoader):
    """Load documents from web search results"""
    
    def __init__(self, cortex_client, search_queries: List[str], max_results_per_query: int = 5):
        self.cortex_client = cortex_client
        self.search_queries = search_queries
        self.max_results_per_query = max_results_per_query
    
    def load(self) -> List[Document]:
        """Load documents from search results"""
        documents = []
        
        for query in self.search_queries:
            print(f"Searching for: {query}")
            
            # Search for URLs
            search_result = self.cortex_client.search(
                query=query,
                max_results=self.max_results_per_query
            )
            
            if search_result.success:
                # Extract content from each source
                for source in search_result.sources:
                    content_result = self.cortex_client.extract(source.url)
                    
                    if content_result.success:
                        doc = Document(
                            page_content=content_result.text,
                            metadata={
                                "source": source.url,
                                "title": source.title,
                                "search_query": query,
                                "confidence": source.confidence,
                                "published_date": source.published_date,
                                "word_count": content_result.metadata.word_count,
                                "cortex_verified": True
                            }
                        )
                        documents.append(doc)
            
            # Rate limiting
            time.sleep(1)
        
        return documents

# Usage
loader = CortexWebLoader(
    cortex_client=cortex_client,
    search_queries=[
        "machine learning best practices 2024",
        "AI ethics guidelines",
        "neural network architectures"
    ],
    max_results_per_query=8
)

documents = loader.load()
print(f"Loaded {len(documents)} documents")

Real-time Retriever

from langchain.schema import BaseRetriever, Document
from langchain.callbacks.manager import CallbackManagerForRetrieverRun
from typing import List
import hashlib

class CortexRetriever(BaseRetriever):
    """Real-time retriever using Cortex"""
    
    def __init__(self, cortex_client, top_k: int = 5, cache_ttl: int = 3600):
        self.cortex_client = cortex_client
        self.top_k = top_k
        self.cache_ttl = cache_ttl
        self.cache = {}
    
    def _get_relevant_documents(
        self,
        query: str,
        *,
        run_manager: CallbackManagerForRetrieverRun
    ) -> List[Document]:
        """Retrieve relevant documents"""
        
        # Check cache
        cache_key = hashlib.md5(query.encode()).hexdigest()
        if cache_key in self.cache:
            cached_result, timestamp = self.cache[cache_key]
            if time.time() - timestamp < self.cache_ttl:
                return cached_result
        
        # Perform real-time search
        search_result = self.cortex_client.search(
            query=query,
            max_results=self.top_k,
            recency="week"
        )
        
        documents = []
        if search_result.success:
            for source in search_result.sources:
                # Extract full content
                content_result = self.cortex_client.extract(source.url)
                
                if content_result.success:
                    doc = Document(
                        page_content=content_result.text,
                        metadata={
                            "source": source.url,
                            "title": source.title,
                            "confidence": source.confidence,
                            "published_date": source.published_date,
                            "extraction_quality": content_result.quality_score
                        }
                    )
                    documents.append(doc)
                else:
                    # Fallback to snippet if extraction fails
                    doc = Document(
                        page_content=source.snippet,
                        metadata={
                            "source": source.url,
                            "title": source.title,
                            "confidence": source.confidence,
                            "published_date": source.published_date,
                            "extraction_quality": 0.5  # Lower quality for snippets
                        }
                    )
                    documents.append(doc)
        
        # Cache results
        self.cache[cache_key] = (documents, time.time())
        
        return documents

# Usage in QA Chain
from langchain.chains import RetrievalQA

retriever = CortexRetriever(cortex_client, top_k=8)
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True
)

result = qa_chain({"query": "What are the latest developments in quantum computing?"})
print(result["result"])
print(f"Sources: {len(result['source_documents'])}")

🤖 Specialized Agents

News Analysis Agent

from langchain.agents import Tool
from datetime import datetime, timedelta

def create_news_agent(cortex_client, llm):
    """Create a news analysis agent"""
    
    def get_breaking_news(topic: str) -> str:
        """Get breaking news on a topic"""
        result = cortex_client.search(
            query=f"breaking news {topic}",
            max_results=8,
            recency="day",
            domain_filter={
                "include": ["reuters.com", "bbc.com", "cnn.com", "bloomberg.com"]
            }
        )
        
        if result.success:
            news_summary = f"Breaking News: {topic}\n\n"
            for i, source in enumerate(result.sources, 1):
                news_summary += f"{i}. {source.title}\n"
                news_summary += f"   Source: {source.domain}\n"
                news_summary += f"   Published: {source.published_date}\n"
                news_summary += f"   Summary: {source.snippet}\n\n"
            
            return news_summary
        return "No breaking news found"
    
    def verify_news_claim(claim: str) -> str:
        """Verify a news claim"""
        validation = cortex_client.validate(
            claim=claim,
            context={"domain": "news", "claim_type": "event"}
        )
        
        result = f"News Verification:\n"
        result += f"Claim: {claim}\n"
        result += f"Verification: {validation.validation_result}\n"
        result += f"Confidence: {validation.confidence_score:.2f}\n"
        
        if validation.evidence.supporting:
            result += "\nSupporting Sources:\n"
            for evidence in validation.evidence.supporting[:3]:
                result += f"- {evidence.source}\n"
        
        return result
    
    def compare_news_coverage(topic: str) -> str:
        """Compare how different news sources cover a topic"""
        sources = ["reuters.com", "bbc.com", "cnn.com", "foxnews.com"]
        coverage = {}
        
        for source in sources:
            search_result = cortex_client.search(
                query=f"site:{source} {topic}",
                max_results=3,
                recency="week"
            )
            
            if search_result.success and search_result.sources:
                coverage[source] = search_result.sources
        
        # Format comparison
        comparison = f"News Coverage Comparison: {topic}\n\n"
        for source, articles in coverage.items():
            comparison += f"{source.upper()}:\n"
            for article in articles:
                comparison += f"- {article.title}\n"
                comparison += f"  {article.snippet[:150]}...\n"
            comparison += "\n"
        
        return comparison
    
    tools = [
        Tool(
            name="Breaking News",
            func=get_breaking_news,
            description="Get the latest breaking news on a specific topic"
        ),
        Tool(
            name="Verify News",
            func=verify_news_claim,
            description="Verify whether a news claim is accurate"
        ),
        Tool(
            name="Compare Coverage",
            func=compare_news_coverage,
            description="Compare how different news sources cover a topic"
        )
    ]
    
    return initialize_agent(
        tools,
        llm,
        agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
        verbose=True
    )

# Usage
news_agent = create_news_agent(cortex_client, llm)
result = news_agent.run("Analyze the latest developments in AI regulation and verify key claims")

Research Assistant Agent

def create_research_assistant(cortex_client, llm):
    """Create a comprehensive research assistant"""
    
    def find_academic_papers(topic: str) -> str:
        """Find academic papers on a topic"""
        result = cortex_client.search(
            query=f"{topic} research papers",
            max_results=10,
            domain_filter={
                "include": ["arxiv.org", "nature.com", "science.org", "pubmed.ncbi.nlm.nih.gov"]
            }
        )
        
        if result.success:
            papers = "Academic Papers Found:\n\n"
            for i, source in enumerate(result.sources, 1):
                papers += f"{i}. {source.title}\n"
                papers += f"   URL: {source.url}\n"
                papers += f"   Confidence: {source.confidence:.2f}\n"
                papers += f"   Abstract: {source.snippet}\n\n"
            
            return papers
        return "No academic papers found"
    
    def get_expert_opinions(topic: str) -> str:
        """Find expert opinions on a topic"""
        result = cortex_client.search(
            query=f"{topic} expert opinion analysis",
            max_results=8,
            domain_filter={
                "include": ["mit.edu", "stanford.edu", "harvard.edu", "nature.com"]
            }
        )
        
        if result.success:
            opinions = "Expert Opinions:\n\n"
            for i, source in enumerate(result.sources, 1):
                # Extract full content for better analysis
                content = cortex_client.extract(source.url)
                if content.success:
                    # Extract key quotes (simplified)
                    sentences = content.text.split('.')[:10]
                    key_quotes = [s.strip() for s in sentences if len(s.strip()) > 50]
                    
                    opinions += f"{i}. {source.title}\n"
                    opinions += f"   Source: {source.url}\n"
                    opinions += f"   Key Points:\n"
                    for quote in key_quotes[:3]:
                        opinions += f"   - {quote}\n"
                    opinions += "\n"
            
            return opinions
        return "No expert opinions found"
    
    def create_bibliography(topic: str) -> str:
        """Create a bibliography for a research topic"""
        result = cortex_client.search(
            query=f"{topic} comprehensive research sources",
            max_results=15
        )
        
        if result.success:
            bibliography = f"Bibliography: {topic}\n"
            bibliography += "=" * (len(topic) + 13) + "\n\n"
            
            for i, source in enumerate(result.sources, 1):
                # Format as academic citation
                title = source.title
                url = source.url
                date = source.published_date or "Date unknown"
                
                bibliography += f"[{i}] {title}. Retrieved from {url}. "
                bibliography += f"(Accessed: {datetime.now().strftime('%B %d, %Y')})\n\n"
            
            return bibliography
        return "Could not create bibliography"
    
    tools = [
        Tool(
            name="Find Academic Papers",
            func=find_academic_papers,
            description="Find academic papers and research studies on a topic"
        ),
        Tool(
            name="Get Expert Opinions",
            func=get_expert_opinions,
            description="Find expert opinions and analysis on a topic"
        ),
        Tool(
            name="Create Bibliography",
            func=create_bibliography,
            description="Create a comprehensive bibliography for a research topic"
        )
    ]
    
    return initialize_agent(
        tools,
        llm,
        agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
        verbose=True
    )

# Usage
research_assistant = create_research_assistant(cortex_client, llm)
result = research_assistant.run("Research the current state of quantum computing and create a bibliography")

📊 Chain Combinations

Multi-Step Analysis Chain

from langchain.chains import SequentialChain, LLMChain
from langchain.prompts import PromptTemplate

def create_analysis_chain(cortex_client, llm):
    """Create a multi-step analysis chain"""
    
    # Step 1: Research
    research_prompt = PromptTemplate(
        input_variables=["topic"],
        template="""
        Research the topic: {topic}
        
        Find the most current and authoritative information available.
        Focus on:
        1. Latest developments
        2. Expert opinions
        3. Statistical data
        4. Future trends
        
        Research findings:
        """
    )
    
    def research_step(topic):
        result = cortex_client.search(
            query=f"{topic} latest developments research",
            max_results=10,
            recency="month"
        )
        
        findings = []
        if result.success:
            for source in result.sources:
                content = cortex_client.extract(source.url)
                if content.success:
                    findings.append({
                        "title": source.title,
                        "content": content.text[:1000],  # First 1000 chars
                        "confidence": source.confidence
                    })
        
        # Format findings
        formatted_findings = ""
        for finding in findings[:5]:
            formatted_findings += f"- {finding['title']}\n"
            formatted_findings += f"  {finding['content'][:200]}...\n\n"
        
        return formatted_findings
    
    # Step 2: Analysis
    analysis_prompt = PromptTemplate(
        input_variables=["research_findings"],
        template="""
        Based on the following research findings, provide a comprehensive analysis:
        
        Research Findings:
        {research_findings}
        
        Please analyze:
        1. Key trends and patterns
        2. Significant developments
        3. Potential implications
        4. Areas of uncertainty or debate
        
        Analysis:
        """
    )
    
    # Step 3: Fact-checking
    def fact_check_step(analysis):
        # Extract claims from analysis
        sentences = analysis.split('.')[:10]
        factual_claims = [s.strip() for s in sentences if len(s.strip()) > 30]
        
        fact_check_results = []
        for claim in factual_claims[:5]:  # Check first 5 claims
            validation = cortex_client.validate(claim)
            fact_check_results.append({
                "claim": claim,
                "result": validation.validation_result,
                "confidence": validation.confidence_score
            })
        
        # Format results
        fact_check_summary = "Fact-Check Results:\n"
        for result in fact_check_results:
            status = "✓" if result["result"] in ["VERIFIED", "LIKELY_TRUE"] else "✗"
            fact_check_summary += f"{status} {result['claim'][:100]}... "
            fact_check_summary += f"({result['result']}, {result['confidence']:.2f})\n"
        
        return fact_check_summary
    
    # Step 4: Final synthesis
    synthesis_prompt = PromptTemplate(
        input_variables=["analysis", "fact_check"],
        template="""
        Based on the analysis and fact-checking results, provide a final synthesis:
        
        Analysis:
        {analysis}
        
        Fact-Check Results:
        {fact_check}
        
        Final Synthesis (include confidence levels and caveats):
        """
    )
    
    # Create chains
    research_chain = LLMChain(llm=llm, prompt=research_prompt, output_key="research")
    analysis_chain = LLMChain(llm=llm, prompt=analysis_prompt, output_key="analysis")
    synthesis_chain = LLMChain(llm=llm, prompt=synthesis_prompt, output_key="synthesis")
    
    # Custom chain that includes Cortex operations
    class CortexAnalysisChain:
        def __init__(self, research_chain, analysis_chain, synthesis_chain, cortex_client):
            self.research_chain = research_chain
            self.analysis_chain = analysis_chain
            self.synthesis_chain = synthesis_chain
            self.cortex_client = cortex_client
        
        def run(self, topic):
            # Step 1: Research
            research_findings = research_step(topic)
            
            # Step 2: Analysis
            analysis = self.analysis_chain.run(research_findings=research_findings)
            
            # Step 3: Fact-checking
            fact_check = fact_check_step(analysis)
            
            # Step 4: Synthesis
            synthesis = self.synthesis_chain.run(analysis=analysis, fact_check=fact_check)
            
            return {
                "topic": topic,
                "research_findings": research_findings,
                "analysis": analysis,
                "fact_check": fact_check,
                "synthesis": synthesis
            }
    
    return CortexAnalysisChain(research_chain, analysis_chain, synthesis_chain, cortex_client)

# Usage
analysis_chain = create_analysis_chain(cortex_client, llm)
result = analysis_chain.run("artificial intelligence in healthcare")

print("=== FINAL SYNTHESIS ===")
print(result["synthesis"])

💡 Best Practices

1. Error Handling and Fallbacks

class RobustCortexTool(BaseTool):
    name = "robust_cortex_search"
    description = "Robust web search with fallback mechanisms"
    
    def __init__(self, cortex_client, fallback_search=None, **kwargs):
        super().__init__(**kwargs)
        self.cortex_client = cortex_client
        self.fallback_search = fallback_search
        self.max_retries = 3
    
    def _run(self, query: str, run_manager=None) -> str:
        for attempt in range(self.max_retries):
            try:
                result = self.cortex_client.search(query, max_results=5)
                if result.success:
                    return self._format_result(result)
            except Exception as e:
                if attempt < self.max_retries - 1:
                    time.sleep(2 ** attempt)  # Exponential backoff
                    continue
                
                # Use fallback if available
                if self.fallback_search:
                    return self.fallback_search(query)
                
                return f"Search failed after {self.max_retries} attempts: {str(e)}"
        
        return "Search failed - no results available"
    
    def _format_result(self, result):
        formatted = f"Search Results Summary:\n{result.summary}\n\nSources:\n"
        for i, source in enumerate(result.sources, 1):
            formatted += f"{i}. {source.title} ({source.url})\n"
        return formatted

2. Cost Optimization

class CachedCortexTool(BaseTool):
    name = "cached_cortex_search"
    description = "Web search with intelligent caching"
    
    def __init__(self, cortex_client, cache_ttl=3600, **kwargs):
        super().__init__(**kwargs)
        self.cortex_client = cortex_client
        self.cache = {}
        self.cache_ttl = cache_ttl
    
    def _run(self, query: str, run_manager=None) -> str:
        # Check cache first
        cache_key = hashlib.md5(query.lower().encode()).hexdigest()
        
        if cache_key in self.cache:
            cached_result, timestamp = self.cache[cache_key]
            if time.time() - timestamp < self.cache_ttl:
                return f"[CACHED] {cached_result}"
        
        # Perform search
        result = self.cortex_client.search(query, max_results=5)
        
        if result.success:
            formatted_result = self._format_result(result)
            # Cache the result
            self.cache[cache_key] = (formatted_result, time.time())
            return formatted_result
        
        return "Search failed"

3. Response Quality Control

def create_quality_controlled_agent(cortex_client, llm):
    """Create an agent with quality control measures"""
    
    def quality_search(query: str) -> str:
        """Search with quality filtering"""
        result = cortex_client.search(query, max_results=10)
        
        if result.success:
            # Filter by confidence threshold
            high_quality_sources = [
                source for source in result.sources 
                if source.confidence > 0.7
            ]
            
            if not high_quality_sources:
                return "No high-quality sources found for this query"
            
            # Format with quality indicators
            formatted = f"High-Quality Search Results (Confidence > 0.7):\n\n"
            formatted += f"Summary: {result.summary}\n\n"
            formatted += "Sources:\n"
            
            for i, source in enumerate(high_quality_sources, 1):
                formatted += f"{i}. {source.title}\n"
                formatted += f"   Confidence: {source.confidence:.2f}\n"
                formatted += f"   URL: {source.url}\n\n"
            
            return formatted
        
        return "Search failed"
    
    def validate_before_response(claim: str) -> str:
        """Validate claims before including in response"""
        validation = cortex_client.validate(claim)
        
        confidence_threshold = 0.6
        if validation.confidence_score < confidence_threshold:
            return f"UNVERIFIED: {claim} (Confidence: {validation.confidence_score:.2f})"
        
        return f"VERIFIED: {claim} (Confidence: {validation.confidence_score:.2f})"
    
    tools = [
        Tool(
            name="Quality Search",
            func=quality_search,
            description="Search for high-quality, verified information"
        ),
        Tool(
            name="Validate Claim",
            func=validate_before_response,
            description="Validate a claim before including it in the response"
        )
    ]
    
    return initialize_agent(
        tools,
        llm,
        agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
        verbose=True
    )

Next: OpenAI Integration → - Direct integration with OpenAI APIs

🚀 Quick Start​

Installation​

Basic Setup​

🔧 Advanced Integrations​

Custom Cortex Tool Class​

Multi-Step Research Agent​

🔄 Document Loaders and Retrievers​

Cortex Document Loader​

Real-time Retriever​

🤖 Specialized Agents​

News Analysis Agent​

Research Assistant Agent​

📊 Chain Combinations​

Multi-Step Analysis Chain​

💡 Best Practices​

1. Error Handling and Fallbacks​

2. Cost Optimization​

3. Response Quality Control​

🚀 Quick Start

Installation

Basic Setup

🔧 Advanced Integrations

Custom Cortex Tool Class

Multi-Step Research Agent

🔄 Document Loaders and Retrievers

Cortex Document Loader

Real-time Retriever

🤖 Specialized Agents

News Analysis Agent

Research Assistant Agent

📊 Chain Combinations

Multi-Step Analysis Chain

💡 Best Practices

1. Error Handling and Fallbacks

2. Cost Optimization

3. Response Quality Control