<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
  <channel>
    <title>Aryan Sharma — Blog</title>
    <link>https://aryan1165.github.io/</link>
    <description>notes on CUDA, inference optimization, and GPU pipelines</description>
    <language>en</language>
    <lastBuildDate>Sat, 11 Apr 2026 00:00:00 GMT</lastBuildDate>
    <atom:link href="https://aryan1165.github.io/feed.xml" rel="self" type="application/rss+xml"/>
    <item>
      <title>CUDA Streams and Events: A Real-World Guide</title>
      <link>https://aryan1165.github.io/blogs/cuda-streams-real-world-guide.html</link>
      <description>How I went from 8 to 84 concurrent ASR sessions on an H100 by understanding CUDA streams, the default stream trap, and why events beat locks for GPU synchronization.</description>
      <pubDate>Wed, 01 Jan 2025 00:00:00 GMT</pubDate>
      <guid>https://aryan1165.github.io/blogs/cuda-streams-real-world-guide.html</guid>
    </item>
  </channel>
</rss>
