
    aіi/                     V   d Z ddlZddlmZ ddlmZ ddlmZ ddlmZm	Z	 ddl
mZmZ  ej                  e      Z G d d	ee      Z G d
 dee      Z G d dee      Ze G d d             Ze G d d             Ze G d d             Z G d d      Z G d d      Z G d d      Zy)z;
Database models and CRUD operations for content pipeline.
    N)	dataclass)datetime)Enum)AnyOptional)execute_query
get_cursorc                        e Zd ZdZdZdZdZdZy)TopicStatuspendingin_progress	publishedskippedfailedN)__name__
__module____qualname__PENDINGIN_PROGRESS	PUBLISHEDSKIPPEDFAILED     1/var/www/html/content-pipeline/database/models.pyr   r      s    GKIGFr   r   c                       e Zd ZdZdZdZy)TopicPriorityABCN)r   r   r   r   r   r    r   r   r   r   r      s    AAAr   r   c                   ,    e Zd ZdZdZdZdZdZdZdZ	dZ
y	)
ContentClusterkostenrechttechnikbranchen	vergleichhowtotrendsproblemeN)r   r   r   KOSTENRECHTTECHNIKBRANCHEN	VERGLEICHHOWTOTRENDSPROBLEMEr   r   r   r"   r"      s*    FEGHIEFHr   r"   c                       e Zd ZU eed<   eed<   eed<   eed<   eed<   eed<   eed<   ee   ed<   eed	<   eed
<   ee   ed<   ee   ed<   eed<   eed<   ede	ee
f   dd fd       Zy)Topicidtitleslugcontent_clusterprioritypriority_scoretarget_keywordssecondary_keywordssearch_intentstatusskip_reason
article_id
created_at
updated_atrowreturnc                      | di |S Nr   r   clsrC   s     r   from_rowzTopic.from_row:       zSzr   Nr   r   r   int__annotations__strr   r   classmethoddictr   rI   r   r   r   r4   r4   )   s    GJ
IM %K#4S> g  r   r4   c                   
   e Zd ZU eed<   eed<   eed<   eed<   eed<   eed<   eed<   eed<   eed	<   ee   ed
<   eed<   eed<   ee   ed<   ee   ed<   ee   ed<   ee   ed<   e	ed<   ee	   ed<   e
deeef   dd fd       Zy)Articler5   topic_idr6   r7   content_markdowncontent_htmlmeta_description
word_countfact_check_passedfact_check_logfact_check_attemptsgeneration_cost_usdthumbnail_path
wp_post_idwp_media_id	author_idrA   published_atrC   rD   c                      | di |S rF   r   rG   s     r   rI   zArticle.from_rowT   rJ   r   N)r   r   r   rL   rM   rN   boolr   floatr   rO   rP   r   rI   r   r   r   rR   rR   ?   s    GMJ
IOSM!SM!#}8$$4S> i  r   rR   c                       e Zd ZU eed<   eed<   eed<   eed<   eed<   eed<   ee   ed<   ee   ed<   eed	<   eed
<   ede	ee
f   dd fd       Zy)PublishedContentr5   sourcer6   r7   summarymain_keywordsr8   r]   r`   rA   rC   rD   c                      | di |S rF   r   rG   s     r   rI   zPublishedContent.from_rowf   rJ   r   NrK   r   r   r   re   re   Y   sj    GKJ
ILc]"4S> .@  r   re   c                       e Zd ZdZdZededee   fd       Z	ede
dee   fd       Zedee   fd       Zedee   fd	       Ze	 	 ddededee
   dee   def
d       Zedee
ef   fd       Zede
de
de
de
dede
dee
   de
defd       Zy
)TopicRepositoryz2CRUD operations for parketry_content_topics table.parketry_content_topicsrS   rD   c                 t    d| j                    d}t        ||f      }|rt        j                  |d         S d S NSELECT * FROM z WHERE id = %sr   TABLEr   r4   rI   )rH   rS   queryrowss       r   	get_by_idzTopicRepository.get_by_idp   s<     >:UXK0*.u~~d1g&8D8r   r7   c                 t    d| j                    d}t        ||f      }|rt        j                  |d         S d S )Nro   z WHERE slug = %sr   rp   )rH   r7   rr   rs   s       r   get_by_slugzTopicRepository.get_by_slugv   s=     +;<UTG,*.u~~d1g&8D8r   c                 p    d| j                    d}t        |      }|rt        j                  |d         S dS )z#Get highest priority pending topic.
            SELECT * FROM z{
            WHERE status = 'pending'
            ORDER BY priority_score DESC, created_at ASC
            LIMIT 1
        r   Nrp   rH   rr   rs   s      r   get_next_pendingz TopicRepository.get_next_pending|   sB    99+ &	 U#*.u~~d1g&8D8r   c                     d| j                    d}t        |      xs g }|D cg c]  }t        j                  |       c}S c c}w )Nrx   zg
            WHERE status = 'pending'
            ORDER BY priority_score DESC, created_at ASC
        rp   rH   rr   rs   rC   s       r   get_all_pendingzTopicRepository.get_all_pending   sJ    99+ &	
 U#)r/34s#444   ANr>   r?   r@   c                     t               5 }d| j                   d}|j                  ||j                  |||f       |j                  dkD  cd d d        S # 1 sw Y   y xY w)N
                UPDATE z
                SET status = %s, skip_reason = %s, article_id = %s, updated_at = NOW()
                WHERE id = %s
            r   )r	   rq   executevaluerowcount)rH   rS   r>   r?   r@   cursorrr   s          r   update_statuszTopicRepository.update_status   sc     \ 	'V		{ #E
 NN56<<j("ST??Q&	' 	' 	's   ?AAc                 x    d| j                    d}t        |      xs g }|D ci c]  }|d   |d    c}S c c}w )Nz?
            SELECT status, COUNT(*) as count
            FROM z%
            GROUP BY status
        r>   count)rq   r   r|   s       r   count_by_statuszTopicRepository.count_by_status   sN    )) 	
 U#)r7;<Hs7|+<<<s   7r6   r8   r9   r:   r;   r<   r=   c	                     t               5 }	d| j                   d}
|	j                  |
||||||||f       |	j                  cd d d        S # 1 sw Y   y xY w)N
                INSERT INTO z
                (title, slug, content_cluster, priority, priority_score,
                 target_keywords, secondary_keywords, search_intent, status)
                VALUES (%s, %s, %s, %s, %s, %s, %s, %s, 'pending')
            r	   rq   r   	lastrowid)rH   r6   r7   r8   r9   r:   r;   r<   r=   r   rr   s              r   insertzTopicRepository.insert   ss     \ 	$V YYK (E NN#"#&!	 ##)	$ 	$ 	$   6AA)NN)r   r   r   __doc__rq   rO   rL   r   r4   rt   rN   rv   rz   listr}   r   rb   r   rP   r   r   r   r   r   rk   rk   k   sy   <%E9 9% 9 9
 9s 9x 9 9
 	9% 	9 	9 5U 5 5 
 &*$('' ' c]	'
 SM' 
' '  =S#X = = $$ $ 	$
 $ $ $ %SM$ $ 
$ $r   rk   c                       e Zd ZdZdZededee   fd       Z	edee   fd       Z
ededed	ed
edededededee   dededefd       Zedededee   dedef
d       Zedededefd       Zy)ArticleRepositoryz,CRUD operations for parketry_articles table.parketry_articlesr@   rD   c                 t    d| j                    d}t        ||f      }|rt        j                  |d         S d S rn   rq   r   rR   rI   )rH   r@   rr   rs   s       r   rt   zArticleRepository.get_by_id   s>     >:UZM2,0wQ(:d:r   c                 p    d| j                    d}t        |      }|rt        j                  |d         S d S )Nrx   zm
            WHERE wp_post_id IS NOT NULL
            ORDER BY published_at DESC
            LIMIT 1
        r   r   ry   s      r   get_last_publishedz$ArticleRepository.get_last_published   sD    99+ &	 U#,0wQ(:d:r   rS   r6   r7   rT   rU   rV   rW   rX   rY   rZ   r[   c                     t               5 }d| j                   d}|j                  ||||||||||	|
|f       |j                  cd d d        S # 1 sw Y   y xY w)Nr   a&  
                (topic_id, title, slug, content_markdown, content_html,
                 meta_description, word_count, fact_check_passed, fact_check_log,
                 fact_check_attempts, generation_cost_usd)
                VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
            r   )rH   rS   r6   r7   rT   rU   rV   rW   rX   rY   rZ   r[   r   rr   s                 r   r   zArticleRepository.insert   s|     \ 	$V YYK (E NN$ $%"''  ##1	$ 	$ 	$s   9AAr]   r^   r_   c                     t               5 }d| j                   d}|j                  |||||f       |j                  dkD  cd d d        S # 1 sw Y   y xY w)Nr   z
                SET wp_post_id = %s, wp_media_id = %s, author_id = %s,
                    published_at = NOW()
                WHERE id = %s
            r   r	   rq   r   r   )rH   r@   r]   r^   r_   r   rr   s          r   update_wordpress_idsz&ArticleRepository.update_wordpress_ids  s_     \ 	'V		{ #E NN5:{Iz"RS??Q&	' 	' 	's   5A

Ar\   c                     t               5 }d| j                   d}|j                  |||f       |j                  dkD  cd d d        S # 1 sw Y   y xY w)Nr   zS
                SET thumbnail_path = %s
                WHERE id = %s
            r   r   )rH   r@   r\   r   rr   s        r   update_thumbnailz"ArticleRepository.update_thumbnail!  sY    \ 	'V		{ #E
 NN5>:">???Q&	' 	' 	's   3AAN)r   r   r   r   rq   rO   rL   r   rR   rt   r   rN   rb   rc   r   r   r   r   r   r   r   r      sO   6E;3 ;8G+< ; ;
 ;8G#4 ; ; &$&$ &$ 	&$
 &$ &$ &$ &$  &$ !&$ !&$ #&$ 
&$ &$P '' ' c]	'
 ' 
' '" '# 's 't ' 'r   r   c                      e Zd ZdZdZedee   fd       Zede	dee   fd       Z
e	 dde	d	e	d
e	de	de	dee   dedee	   defd       Zededefd       Ze	 	 	 ddee	   dee   dedee   fd       Zededefd       Zy)PublishedContentRepositoryz5CRUD operations for parketry_published_content table.parketry_published_contentrD   c                     d| j                    d}t        |      xs g }|D cg c]  }t        j                  |       c}S c c}w )Nro   z ORDER BY published_at DESCrq   r   re   rI   r|   s       r   get_allz"PublishedContentRepository.get_all2  sD     +FGU#)r:>?3 ))#.???r~   search_termc                     d| j                    d}t        |||f      xs g }|D cg c]  }t        j                  |       c}S c c}w )z1Search using FULLTEXT index on title and summary.zW
            SELECT *, MATCH(title, summary) AGAINST(%s) as relevance
            FROM za
            WHERE MATCH(title, summary) AGAINST(%s)
            ORDER BY relevance DESC
        r   )rH   r   rr   rs   rC   s        r   search_fulltextz*PublishedContentRepository.search_fulltext8  sT    )) 	 U[+$>?E2:>?3 ))#.???s   ANrf   r6   r7   rg   rh   r]   r`   r8   c	                     t               5 }	d| j                   d}
|	j                  |
||||||||f       |	j                  cd d d        S # 1 sw Y   y xY w)Nr   z
                (source, title, slug, summary, main_keywords, content_cluster, wp_post_id, published_at)
                VALUES (%s, %s, %s, %s, %s, %s, %s, %s)
            r   )rH   rf   r6   r7   rg   rh   r]   r`   r8   r   rr   s              r   r   z!PublishedContentRepository.insertD  sk     \ 
	$V YYK (E
 NNg}oz[gh ##
	$ 
	$ 
	$r   articlec           
          | j                  d|j                  |j                  |j                  d|j                  |j
                  xs t        j                               S )z7Insert into published_content from a completed article.pipeline )rf   r6   r7   rg   rh   r]   r`   )r   r6   r7   rV   r]   r`   r   now)rH   r   s     r   insert_from_articlez.PublishedContentRepository.insert_from_article\  sT     zz--,,)) --?  
 	
r   clusterexclude_wp_idlimitc           
          d| j                    d}t        |||||f      }|sg S |D cg c]"  }|d   |d   |d   |j                  dd      d$ c}S c c}w )	a  
        Find published articles suitable for internal linking.

        Prioritizes articles in the same content cluster, then by recency.

        Args:
            cluster: Content cluster to prioritize
            exclude_wp_id: WordPress post ID to exclude
            limit: Maximum results to return

        Returns:
            List of dicts with wp_post_id, slug, title, summary
        zW
            SELECT wp_post_id, slug, title, summary, content_cluster
            FROM z
            WHERE slug IS NOT NULL AND slug != ''
            AND (%s IS NULL OR wp_post_id != %s)
            ORDER BY
                CASE WHEN content_cluster = %s THEN 0 ELSE 1 END,
                published_at DESC
            LIMIT %s
        r]   r7   r6   rg   r   )r]   r7   r6   rg   )rq   r   get)rH   r   r   r   rr   rs   rC   s          r   get_link_targetsz+PublishedContentRepository.get_link_targetsi  s    ()) 		 U]M7E$RSI 
  ",/FW779b1	
 	
 
s   'Ac                     t               5 }d| j                   d}|j                  ||f       |j                  dkD  cddd       S # 1 sw Y   yxY w)z6Delete a published content entry by WordPress post ID.zDELETE FROM z WHERE wp_post_id = %sr   Nr   )rH   r]   r   rr   s       r   delete_by_wp_post_idz/PublishedContentRepository.delete_by_wp_post_id  sO     \ 	'V"399+-CDENN5:-0??Q&	' 	' 	's   2AA)N)NN   )r   r   r   r   rq   rO   r   re   r   rN   r   r   rL   r   r   rR   r   rP   r   rb   r   r   r   r   r   r   -  sg   ?(E@-. @ @
 	@# 	@$7G2H 	@ 	@  *.$$ $ 	$
 $ $ SM$ $ "#$ 
$ $. 

' 

c 

 

  "&'+	)
#)
  })
 	)

 
d)
 )
V 'c 'd ' 'r   r   )r   loggingdataclassesr   r   enumr   typingr   r   database.connectionr   r	   	getLoggerr   loggerrN   r   r   r"   r4   rR   re   rk   r   r   r   r   r   <module>r      s     !     9			8	$#t C S$    *   2   "b$ b$JZ' Z'zn' n'r   